Skip to content

Commit

Permalink
Merge pull request #1244: Add type checking (mypy)
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin authored Jun 30, 2023
2 parents 45f54da + a5c173a commit 9e8c9e0
Show file tree
Hide file tree
Showing 14 changed files with 115 additions and 10 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@

* export, frequencies, refine, traits: Add a new flag `--metadata-id-columns` to customize the possible metadata ID columns. Previously, this was only available in `augur filter`. [#1240][] (@victorlin)

### Bug fixes

* parse: Fix a bug where `--fix-dates` was always applied, with a default of `--fix-dates=monthfirst`. Now, running without `--fix-dates` will leave dates as-is. [#1247][] (@victorlin)

[#1240]: https://github.com/nextstrain/augur/pull/1240
[#1247]: https://github.com/nextstrain/augur/issues/1247

## 22.0.3 (14 June 2023)

Expand Down
6 changes: 3 additions & 3 deletions augur/filter/include_exclude_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
strains_to_keep = set(metadata.index.values)
strains_to_filter = []
strains_to_force_include = []
distinct_strains_to_force_include = set()
distinct_strains_to_force_include: Set = set()

# Track strains that should be included regardless of filters.
for include_function, include_kwargs in include_by:
Expand Down Expand Up @@ -713,9 +713,9 @@ def apply_filters(metadata, exclude_by: List[FilterOption], include_by: List[Fil
if filter_function is filter_by_query:
try:
# pandas ≥1.5.0 only
UndefinedVariableError = pd.errors.UndefinedVariableError
UndefinedVariableError = pd.errors.UndefinedVariableError # type: ignore
except AttributeError:
UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError
UndefinedVariableError = pd.core.computation.ops.UndefinedVariableError # type: ignore
if isinstance(e, UndefinedVariableError):
raise AugurError(f"Query contains a column that does not exist in metadata.") from e
raise AugurError(f"Error when applying query. Ensure the syntax is valid per <https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#indexing-query>.") from e
Expand Down
2 changes: 1 addition & 1 deletion augur/filter/subsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def _calculate_fractional_sequences_per_group(
0.4844
"""
lo = 1e-5
hi = target_max_value
hi = float(target_max_value)

while (hi / lo) > 1.1:
mid = (lo + hi) / 2
Expand Down
7 changes: 6 additions & 1 deletion augur/io/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,15 @@ def write_records_to_tsv(records, output_file):

def _get_delimiter(path: str, valid_delimiters: Iterable[str]):
"""Get the delimiter of a file given a list of valid delimiters."""

for delimiter in valid_delimiters:
if len(delimiter) != 1:
raise AugurError(f"Delimiters must be single-character strings. {delimiter!r} does not satisfy that condition.")

with open_file(path) as file:
try:
# Infer the delimiter from the first line.
return csv.Sniffer().sniff(file.readline(), valid_delimiters).delimiter
return csv.Sniffer().sniff(file.readline(), "".join(valid_delimiters)).delimiter
except csv.Error as error:
# This assumes all csv.Errors imply a delimiter issue. That might
# change in a future Python version.
Expand Down
2 changes: 1 addition & 1 deletion augur/io/shell_command_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from signal import SIGKILL
except ImportError:
# A non-POSIX platform
SIGKILL = None
SIGKILL = None # type: ignore[assignment]


def run_shell_command(cmd, raise_errors=False, extra_env=None):
Expand Down
2 changes: 1 addition & 1 deletion augur/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def parse_sequence(sequence, fields, strain_key="strain", separator="|", prettif
etal='lower' if field.startswith('author') else None)

# parse dates and convert to a canonical format
if fix_dates and 'date' in metadata:
if fix_dates_format and 'date' in metadata:
metadata['date'] = fix_dates(
metadata['date'],
dayfirst=fix_dates_format=='dayfirst'
Expand Down
3 changes: 3 additions & 0 deletions augur/titer_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Prevent mypy from erroring on "flu" not being defined.
# mypy: disable-error-code="name-defined"

import os
import logging
import numpy as np
Expand Down
4 changes: 2 additions & 2 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
**computer**, if determinable, otherwise the *fallback* number (which
defaults to 1).
"""
try:
if hasattr(os, "sched_getaffinity"):
# Note that this is the correct function to use, not os.cpu_count(), as
# described in the latter's documentation.
#
Expand All @@ -290,7 +290,7 @@ def available_cpu_cores(fallback: int = 1) -> int:
# naively use all 24, we'd end up with two threads across the 12 cores.
# This would degrade performance rather than improve it!
return len(os.sched_getaffinity(0))
except:
else:
# cpu_count() returns None if the value is indeterminable.
return os.cpu_count() or fallback

Expand Down
2 changes: 1 addition & 1 deletion augur/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def format_path(path: Iterable[Union[str, int]]) -> str:
'.x.y[42].z'
"""
def valid_identifier(x) -> bool:
return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x)
return isinstance(x, str) and re.search(r'^[a-zA-Z$_][a-zA-Z0-9_$]*$', x) is not None

def fmt(x) -> str:
return (f"[{x}]" if isinstance(x, int) else
Expand Down
13 changes: 13 additions & 0 deletions docs/contribute/DEV_DOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,19 @@ Troubleshooting tip: As tests run on the development code in the augur repositor
We use continuous integration with GitHub Actions to run tests on every pull request submitted to the project.
We use [codecov](https://codecov.io/) to automatically produce test coverage for new contributions and the project as a whole.

### Type annotations

Our goal is to gradually add [type annotations][] to our code so that we can catch errors earlier and be explicit about the interfaces expected and provided. Annotation pairs well with the functional approach taken by the package.

During development you can run static type checks using [mypy][]:

$ mypy
# No output is good!

There are also many [editor integrations for mypy][].

[editor integrations for mypy]: https://github.com/python/mypy#integrations

### Releasing

Versions for this project, Augur, from 3.0.0 onwards aim to follow the
Expand Down
49 changes: 49 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[mypy]
# Don't set python_version. Instead, use the default behavior of checking for
# compatibility against the version of Python used to run mypy.
files = augur/

# Require functions with an annotated return type to be explicit about
# potentially returning None (via Optional[…]).
strict_optional = False

# In the future maybe we can contribute typing stubs for these modules (either
# complete stubs in the python/typeshed repo or partial stubs just in
# this repo), but for now that's more work than we want to invest. These
# sections let us ignore missing stubs for specific modules without hiding all
# missing errors like (--ignore-missing-imports).
[mypy-treetime.*]
ignore_missing_imports = True

[mypy-isodate.*]
ignore_missing_imports = True

[mypy-Bio.*]
ignore_missing_imports = True

[mypy-BCBio.*]
ignore_missing_imports = True

[mypy-matplotlib.*]
ignore_missing_imports = True

[mypy-seaborn.*]
ignore_missing_imports = True

[mypy-cvxopt.*]
ignore_missing_imports = True

[mypy-ipdb.*]
ignore_missing_imports = True

[mypy-jsonschema.*]
ignore_missing_imports = True

[mypy-pyfastx.*]
ignore_missing_imports = True

[mypy-networkx.*]
ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@
"cram >=0.7",
"deepdiff >=4.3.2",
"freezegun >=0.3.15",
"mypy",
"nextstrain-sphinx-theme >=2022.5",
"pandas-stubs >=1.0.0, ==1.*",
"pylint >=1.7.6",
"pytest >=5.4.1",
"pytest-cov >=2.8.1",
Expand All @@ -83,6 +85,8 @@
"sphinx-markdown-tables >= 0.0.9",
"sphinx-rtd-theme >=0.4.3",
"sphinx-autodoc-typehints >=1.21.4",
"types-jsonschema >=3.0.0, ==3.*",
"types-setuptools",
"wheel >=0.32.3",
"ipdb >=0.10.1"
]
Expand Down
17 changes: 17 additions & 0 deletions tests/functional/parse.t
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,21 @@ Error on the first duplicate.
ERROR: Duplicate found for 'SEQ1'.
[2]

Run without --fix-dates. The date is left unchanged.

$ cat >$TMP/data.fasta <<~~
> >SEQ1|05/01/2020
> AAA
> ~~
$ ${AUGUR} parse \
> --sequences $TMP/data.fasta \
> --output-sequences "$TMP/sequences.fasta" \
> --output-metadata "$TMP/metadata.tsv" \
> --fields strain date

$ cat "$TMP/metadata.tsv"
strain date
SEQ1 05/01/2020
$ rm -f "$TMP/metadata.tsv"

$ popd > /dev/null
9 changes: 9 additions & 0 deletions tests/test_mypy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pathlib import Path
from subprocess import run

topdir = Path(__file__).resolve().parent.parent

def test_mypy():
# Check the exit status ourselves for nicer test output on failure
result = run(["mypy"], cwd = topdir)
assert result.returncode == 0, "mypy exited with errors"

0 comments on commit 9e8c9e0

Please sign in to comment.