Skip to content

Commit

Permalink
Merge pull request #15 from mpi2/refactor
Browse files Browse the repository at this point in the history
Rename module and reorder libraries
  • Loading branch information
marinak-ebi authored Oct 23, 2024
2 parents 64720e0 + fd7ba99 commit dc22114
Show file tree
Hide file tree
Showing 21 changed files with 87 additions and 82 deletions.
1 change: 0 additions & 1 deletion impc_api_helper/MANIFEST.in

This file was deleted.

19 changes: 0 additions & 19 deletions impc_api_helper/impc_api_helper/temp.py

This file was deleted.

2 changes: 0 additions & 2 deletions impc_api_helper/pytest.ini

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The package is build using [setuptools](https://setuptools.pypa.io/en/latest/use


```
from impc_api_helper import solr_request
from impc_api import solr_request
num_found, df = solr_request( core='genotype-phenotype', params={
'q': '*:*'
'rows': 10
Expand All @@ -23,7 +23,7 @@ num_found, df = solr_request( core='genotype-phenotype', params={
## Installing the package for development
We use [pytest](https://docs.pytest.org/en/stable/) for testing. To install in dev mode follow [stepts 1 and 2](#installing-the-package-for-use) above and then:

3. Install the package running `pip install '.[dev]'`
3. Install the package running `pip install -e .`
This should install `pytest` and enable you to run tests:

```
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions impc_module/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include impc_api/utils/core_fields.json
44 changes: 26 additions & 18 deletions impc_api_helper/README.md → impc_module/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# IMPC_API_HELPER
`impc_api_helper` is a python package.
# IMPC_API
`impc_api` is a python package.

The functions in this package are intended for use on a Jupyter Notebook.
1. **Create a virtual environment (optional but recommended)**:
On Mac:
`python3 -m venv .venv`
`source .venv/bin/activate`

3. **Install the package running**: `pip install impc_api`
4. **Try it out**: Create a [Jupyter Notebook](https://jupyter.org/install#jupyter-notebook) and try some of the examples below:
## Installing the package for the first time
1. Clone the repository and navigate into it. Navigate into the package name until you can see `setup.py` and `pyproject.toml`
2. Run `python3 -m build`, this builds the package, a couple of new files/folders will appear.
Expand All @@ -13,13 +19,15 @@ The functions in this package are intended for use on a Jupyter Notebook.
The available functions can be imported as:

```
from impc_api_helper import solr_request, batch_solr_request
from impc_api import solr_request, batch_solr_request
```

## 1. Solr request
The most basic request to the IMPC solr API
```
num_found, df = solr_request( core='genotype-phenotype', params={
num_found, df = solr_request(
core='genotype-phenotype',
params={
'q': '*:*',
'rows': 10,
'fl': 'marker_symbol,allele_symbol,parameter_stable_id'
Expand All @@ -32,38 +40,41 @@ num_found, df = solr_request( core='genotype-phenotype', params={

```
num_found, df = solr_request(
core="genotype-phenotype",
params={
core="genotype-phenotype",
params={
"q": "*:*",
"rows": 0,
"facet": "on",
"facet.field": "zygosity",
"facet.limit": 15,
"facet.mincount": 1,
},
)
}
)
```

### b. Solr request validation
A common pitfall when writing a query is the misspelling of `core` and `fields` arguments. For this, we have included a `validate` argument that raises a warning when these values are not as expected. Note this does not prevent you from executing a query; it just alerts you to a potential issue.


#### Core validation
```
num_found, df = solr_request( core='invalid_core', params={
num_found, df = solr_request(
core='invalid_core',
params={
'q': '*:*',
'rows': 10
},
validate=True
)
> InvalidCoreWarning: Invalid core: "genotype-phenotyp", select from the available cores:
> dict_keys(['experiment', 'genotype-phenotype', 'impc_images', 'phenodigm', 'statistical-result']))
> dict_keys(['experiment', 'genotype-phenotype', 'impc_images', 'phenodigm', 'statistical-result'])
```

#### Field list validation
```
num_found, df = solr_request( core='genotype-phenotype', params={
num_found, df = solr_request(
core='genotype-phenotype',
params={
'q': '*:*',
'rows': 10,
'fl': 'invalid_field,marker_symbol,allele_symbol'
Expand Down Expand Up @@ -126,7 +137,7 @@ Pass the list to the `field_list` param and specify the type of `fl` in `field_t

```
# List of gene symbols
genes = ["Zfp580","Firrm","Gpld1","Mbip"]
genes = ["Zfp580", "Firrm", "Gpld1", "Mbip"]
df = batch_solr_request(
core='genotype-phenotype',
Expand All @@ -140,11 +151,11 @@ df = batch_solr_request(
)
print(df.head())
```
This too can be downloaded
This can be downloaded too:

```
# List of gene symbols
genes = ["Zfp580","Firrm","Gpld1","Mbip"]
genes = ["Zfp580", "Firrm", "Gpld1", "Mbip"]
df = batch_solr_request(
core='genotype-phenotype',
Expand All @@ -159,6 +170,3 @@ df = batch_solr_request(
)
print(df.head())
```



File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
from IPython.display import display
import json
import warnings
from pathlib import Path

import pandas as pd
import requests
from tqdm import tqdm
from .solr_request import solr_request
from pathlib import Path
import warnings
from impc_api_helper.utils.warnings import (
from IPython.display import display

from impc_api.utils.validators import DownloadFormatValidator
from impc_api.utils.warnings import (
warning_config,
RowsParamIgnored,
UnsupportedDownloadFormatError,
)
from impc_api_helper.utils.validators import DownloadFormatValidator

from .solr_request import solr_request

# Initialise warning config
warning_config()


def batch_solr_request(
core, params, download=False, batch_size=5000, filename="batch_request"
):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from IPython.display import display
from tqdm import tqdm
import pandas as pd
import requests
from impc_api_helper.utils.validators import CoreParamsValidator
from IPython.display import display
from tqdm import tqdm

from impc_api.utils.validators import CoreParamsValidator

# Display the whole dataframe <15
pd.set_option("display.max_rows", 15)
pd.set_option("display.max_columns", None)


# Create helper function
def solr_request(core, params, silent=False, validate=False):
"""Performs a single Solr request to the IMPC Solr API.
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
from pydantic import BaseModel, model_validator, field_validator
"""
This module provides validation for core fields and download formats using Pydantic models.
Classes:
- ValidationJson: Loads and validates core fields from a JSON configuration file.
- CoreParamsValidator: Validates core names and associated fields (fl), issuing warnings for invalid inputs.
- DownloadFormatValidator: Validates the download format (wt) to ensure it is supported (json or csv).
Functions:
- get_fields(fields: str) -> List[str]: Parses a comma-separated string of field names into a list.
Custom Exceptions:
- InvalidCoreWarning: Raised for invalid core names.
- InvalidFieldWarning: Raised for unexpected field names.
- UnsupportedDownloadFormatError: Raised for unsupported download formats.
"""

import json
from typing import List, Dict
from pathlib import Path
import warnings
from dataclasses import dataclass, field
from impc_api_helper.utils.warnings import (
from pathlib import Path
from typing import List, Dict
from pydantic import BaseModel, model_validator, field_validator
from impc_api.utils.warnings import (
warning_config,
InvalidCoreWarning,
InvalidFieldWarning,
Expand All @@ -14,7 +31,6 @@
# Initialise warning config
warning_config()


# Dataclass for the json validator
@dataclass
class ValidationJson:
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions impc_api_helper/pyproject.toml → impc_module/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "impc_api_helper"
name = "impc_api"
version = "0.1.0"
description = "A package to facilitate making API requests to the IMPC Solr API"
authors = [
Expand All @@ -27,7 +27,7 @@ dev = [
]

[tool.setuptools.packages.find]
include = ["impc_api_helper", "impc_api_helper.*"]
include = ["impc_api", "impc_api.*"]

[project.urls]
"Homepage" = "https://github.com/mpi2/impc-data-api-workshop"
Expand Down
2 changes: 2 additions & 0 deletions impc_module/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
pythonpath = impc_api
4 changes: 2 additions & 2 deletions impc_api_helper/setup.py → impc_module/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@


setup(
name='impc_api_helper',
name='impc_api',
version='0.1.0',
description='A package to facilitate making API request to the IMPC Solr API',
author='MPI2, Marina Kan, Diego Pava',
url='https://github.com/mpi2/impc-data-api-workshop',
packages=find_packages(include=["impc_api_helper", "impc_api_helper.*"]),
packages=find_packages(include=["impc_api", "impc_api.*"]),
include_package_data=True,
install_requires=[
'pandas>=2.2.0',
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
import pytest
import json
from pathlib import Path
from unittest.mock import patch, call, Mock
from impc_api_helper.batch_solr_request import (

import pandas as pd
import pytest
from pandas.testing import assert_frame_equal

from impc_api.batch_solr_request import (
batch_solr_request,
_batch_solr_generator,
solr_request,
_batch_to_df,
_solr_downloader,
_read_downloaded_file,
)
from impc_api_helper.utils.warnings import (
from impc_api.utils.warnings import (
RowsParamIgnored,
UnsupportedDownloadFormatError,
)
import json
import pandas as pd
from pandas.testing import assert_frame_equal


# When rows is passed to batch solr request, a warning is raised.
# Let's ignore this warning in all tests except the one that asserts the warning
pytestmark = pytest.mark.filterwarnings(
"ignore::impc_api_helper.utils.warnings.RowsParamIgnored"
"ignore::impc_api.utils.warnings.RowsParamIgnored"
)


Expand Down Expand Up @@ -51,15 +52,15 @@ def common_params(self):
# solr_request will be mocked with different values for numFound, therefore it is passed as param
@pytest.fixture
def mock_solr_request(self, request):
with patch("impc_api_helper.batch_solr_request.solr_request") as mock:
with patch("impc_api.batch_solr_request.solr_request") as mock:
# Mock expected return content of the solr_request (numFound and df)
mock.return_value = (request.param, pd.DataFrame())
yield mock

# Fixture mocking _batch_to_df
@pytest.fixture
def mock_batch_to_df(self):
with patch("impc_api_helper.batch_solr_request._batch_to_df") as mock:
with patch("impc_api.batch_solr_request._batch_to_df") as mock:
# Mock expected return content of the _batch_to_df (pd.DataFrame)
mock.return_value = pd.DataFrame()
yield mock
Expand Down Expand Up @@ -157,13 +158,13 @@ def test_batch_solr_request_download_false_large_request(
# Fixture mocking _batch_solr_generator
@pytest.fixture
def mock_batch_solr_generator(self):
with patch("impc_api_helper.batch_solr_request._batch_solr_generator") as mock:
with patch("impc_api.batch_solr_request._batch_solr_generator") as mock:
yield mock

# Fixture mocking _solr_downloader. Yields a tmp_path to write a file for the duration of the test.
@pytest.fixture
def mock_solr_downloader(self, tmp_path):
with patch("impc_api_helper.batch_solr_request._solr_downloader") as mock:
with patch("impc_api.batch_solr_request._solr_downloader") as mock:
temp_dir = Path(tmp_path) / "temp_dir"
temp_dir.mkdir()
yield mock
Expand Down Expand Up @@ -377,7 +378,7 @@ def test_batch_solr_request_multiple_fields(

# Test the warning when params["rows"] is passed
@pytest.mark.filterwarnings(
"default::impc_api_helper.utils.warnings.RowsParamIgnored"
"default::impc_api.utils.warnings.RowsParamIgnored"
)
@pytest.mark.parametrize("mock_solr_request", [10000], indirect=True)
def test_param_rows_warning(core, common_params, mock_solr_request):
Expand Down Expand Up @@ -408,7 +409,7 @@ def mock_solr_request_generator(self, request):
"""Patches solr_request for _batch_to_df _batch_solr_generator producing a df dynamically.
Creates a df in chunks (row by row) mocking incoming batches of responses.
"""
with patch("impc_api_helper.batch_solr_request.solr_request") as mock:
with patch("impc_api.batch_solr_request.solr_request") as mock:
# Call the generator
data_generator = self.data_generator()

Expand Down Expand Up @@ -473,7 +474,7 @@ def test_batch_to_df(
# Fixture to mock the requests module
@pytest.fixture
def mock_requests_get(self, request):
with patch("impc_api_helper.batch_solr_request.requests.get") as mock_get:
with patch("impc_api.batch_solr_request.requests.get") as mock_get:
# Capture the format of the response
wt = request.param["wt"]
mock_get.return_value.format = wt
Expand Down Expand Up @@ -579,7 +580,7 @@ def test_batch_solr_generator(
# Fixture to mock requests.get returning a status code.
@pytest.fixture
def mock_requests_get_error(self, request):
with patch("impc_api_helper.batch_solr_request.requests.get") as mock_get:
with patch("impc_api.batch_solr_request.requests.get") as mock_get:
mock_get.return_value.status_code = request.param
yield mock_get

Expand Down
File renamed without changes.
Loading

0 comments on commit dc22114

Please sign in to comment.