From 5bfff1b56b1e31c54d5b61c71f4f16b5d6ffe57f Mon Sep 17 00:00:00 2001 From: Bede Constantinides Date: Tue, 14 May 2024 08:30:41 +0100 Subject: [PATCH] Ruff pre-commit and related linting fixes --- .pre-commit-config.yaml | 16 +++++------- README.md | 3 ++- src/primaschema/__init__.py | 2 +- src/primaschema/cli.py | 8 ++++-- src/primaschema/lib.py | 50 +++++++++++++++++++------------------ src/primaschema/util.py | 1 + test/test_all.py | 32 +++++++++++------------- 7 files changed, 56 insertions(+), 56 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec7f5d3..5707941 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,7 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 - hooks: - - id: check-yaml - - id: end-of-file-fixer - - id: trailing-whitespace -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/README.md b/README.md index 7013202..177dc7e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The toolkit for validating and building tiling amplicon PCR primer scheme defini -## Install (Python 3.10+) +## Install (Python 3.11+) ```shell # Latest stable release @@ -20,6 +20,7 @@ pip install ./primaschema git clone https://github.com/pha4ge/primaschema.git cd primaschema pip install --editable '.[dev]' +pre-commit install pytest ``` diff --git a/src/primaschema/__init__.py b/src/primaschema/__init__.py index a33877f..2671e70 100644 --- a/src/primaschema/__init__.py +++ b/src/primaschema/__init__.py @@ -1,6 +1,6 @@ """A toolkit for tiling primer scheme defintions""" -from pathlib import Path +# from pathlib import Path __version__ = "0.2.0" diff --git a/src/primaschema/cli.py b/src/primaschema/cli.py index ce17e96..b2fca9e 100644 --- a/src/primaschema/cli.py +++ b/src/primaschema/cli.py @@ -55,7 +55,9 @@ def validate_recursive(root_dir: Path, full: bool = False, force: bool = False): lib.validate_recursive(root_dir=root_dir, full=full, force=force) -def build(scheme_dir: Path, out_dir: Path = Path(), full: bool = False, force: bool = False): +def build( + scheme_dir: Path, out_dir: Path = Path(), full: bool = False, force: bool = False +): """ Build a primer scheme bundle containing info.yml, primer.bed and reference.fasta @@ -67,7 +69,9 @@ def build(scheme_dir: Path, out_dir: Path = Path(), full: bool = False, force: b lib.build(scheme_dir=scheme_dir, out_dir=out_dir, full=full, force=force) -def build_recursive(root_dir: Path, full: bool = False, force: bool = False, nested: bool = False): +def build_recursive( + root_dir: Path, full: bool = False, force: bool = False, nested: bool = False +): """ Recursively build primer scheme bundles in the specified directory diff --git a/src/primaschema/lib.py b/src/primaschema/lib.py index 5e88069..9421262 100644 --- a/src/primaschema/lib.py +++ b/src/primaschema/lib.py @@ -15,6 +15,7 @@ import pandas as pd import yaml from Bio import SeqIO + # from linkml.generators.pydanticgen import PydanticGenerator from linkml.generators.pythongen import PythonGenerator from linkml_runtime.utils.schemaview import SchemaView @@ -49,8 +50,12 @@ def get_primer_schemes_path(): """Locate primer-schemes repo root using environment variable""" env_var = "PRIMER_SCHEMES_PATH" if ( - not env_var in os.environ - or not (Path(os.environ[env_var]).resolve() / Path("schema") / Path("primer_scheme.yml")).exists() + env_var not in os.environ + or not ( + Path(os.environ[env_var]).resolve() + / Path("schema") + / Path("primer_scheme.yml") + ).exists() ): raise RuntimeError( f'Invalid or unset environment variable {env_var} ({os.environ.get(env_var)}).\n\nSet {env_var} to the path of a local copy of the primer-schemes repo to proceed. For example, do `git clone https://github.com/pha4ge/primer-schemes` followed by `export {env_var}="/path/to/primer-schemes"`' @@ -133,7 +138,7 @@ def hash_scheme_bed(bed_path: Path, fasta_path: Path) -> str: """ Hash a 6 column scheme.bed file by first converting to 7 column primer.bed """ - logging.info(f"Hashing scheme.bed using reference backfill") + logging.info("Hashing scheme.bed using reference backfill") ref_record = SeqIO.read(fasta_path, "fasta") df = parse_scheme_bed(bed_path) records = df.to_dict("records") @@ -210,14 +215,14 @@ def validate_with_linkml_schema(yaml_path: Path, full: bool = False): schema_view = SchemaView(schema_path) schema_gen = PythonGenerator(schema_view.schema) schema_compiled = schema_gen.compile_module() - data_instance = schema_compiled.PrimerScheme(**data) + schema_compiled.PrimerScheme(**data) # Errors on validation failure else: if not pythonised_schema_path.exists(): run(f"gen-python {schema_path} > {pythonised_schema_path}") logging.info(f"Wrote Pythonised schema to {pythonised_schema_path}") print(run("ls").stdout) PrimerScheme = import_class_from_path(pythonised_schema_path) - data_instance = PrimerScheme(**data) + PrimerScheme(**data) # Errors on validation failure def validate_bed(bed_path: Path, bed_type=Literal["primer", "scheme"]): @@ -255,17 +260,9 @@ def infer_bed_type(bed_path: Path) -> str: def validate(scheme_dir: Path, full: bool = False, force: bool = False): - # schema_path = get_primer_schemes_path() / "schema/scheme_schema.latest.json" logging.info(f"Validating {scheme_dir}") validate_bed(scheme_dir / "primer.bed", bed_type="primer") - # validate_yaml_with_json_schema( - # yaml_path=scheme_dir / "info.yml", schema_path=schema_path - # ) - schema_path = get_primer_schemes_path() / "schema/primer_scheme.yml" - validate_with_linkml_schema( - yaml_path=scheme_dir / "info.yml", - full=full - ) + validate_with_linkml_schema(yaml_path=scheme_dir / "info.yml", full=full) scheme = parse_yaml(scheme_dir / "info.yml") existing_primer_checksum = scheme.get("primer_checksum") existing_reference_checksum = scheme.get("reference_checksum") @@ -298,7 +295,7 @@ def validate(scheme_dir: Path, full: bool = False, force: bool = False): logging.info(f"Validation successful for {scheme.get('name')} ") -def validate_recursive(root_dir: Path, force: bool = False): +def validate_recursive(root_dir: Path, full: bool = False, force: bool = False): """Validate all schemes in a directory tree""" schemes_paths = {} for entry in scan(root_dir): @@ -308,11 +305,15 @@ def validate_recursive(root_dir: Path, force: bool = False): schemes_paths[scheme] = scheme_dir for scheme, path in schemes_paths.items(): - validate(scheme_dir=path, force=force) + validate(scheme_dir=path, full=full, force=force) def build( - scheme_dir: Path, out_dir: Path = Path(), full: bool = False, force: bool = False, nested: bool = True + scheme_dir: Path, + out_dir: Path = Path(), + full: bool = False, + force: bool = False, + nested: bool = True, ): """ Build a PHA4GE primer scheme bundle. @@ -347,7 +348,9 @@ def build( os.remove("scheme.bed") -def build_recursive(root_dir: Path, full: bool = False, force: bool = False, nested: bool = False): +def build_recursive( + root_dir: Path, full: bool = False, force: bool = False, nested: bool = False +): """Build all schemes in a directory tree""" schemes_paths = {} for entry in scan(root_dir): @@ -430,7 +433,6 @@ def diff(bed1_path: Path, bed2_path: Path, only_positions: bool = False): def show_non_ref_alts(scheme_dir: Path): """Show primer records with sequences not matching the reference sequence""" bed_path = scheme_dir / "primer.bed" - fasta_path = scheme_dir / "reference.fasta" with TemporaryDirectory() as temp_dir: convert_scheme_bed_to_primer_bed( bed_path=scheme_dir / "scheme.bed", @@ -444,11 +446,11 @@ def compute_intervals(bed_path: Path) -> dict[str, dict[str, (int, int)]]: # find primer positions for all primers in the bed file and compute maximum # interval between primers of the same name - primer_name_re = re.compile(r'^(?P.*)_(LEFT|RIGHT)(_.+)?$') - eden_primer_name_re = re.compile(r'^(?P.*_[AB][0-9])(F|R)_\d+$') + primer_name_re = re.compile(r"^(?P.*)_(LEFT|RIGHT)(_.+)?$") + eden_primer_name_re = re.compile(r"^(?P.*_[AB][0-9])(F|R)_\d+$") all_intervals: dict[str, dict[str, (int, int)]] = {} for line in open(bed_path): - line_parts = line.strip().split('\t') + line_parts = line.strip().split("\t") if len(line_parts) < 6: # skip lines that don't have at least 6 fields continue @@ -463,10 +465,10 @@ def compute_intervals(bed_path: Path) -> dict[str, dict[str, (int, int)]]: if not primer_name_re: raise ValueError(f"Invalid primer name {name}") primer_name = primer_match.group("name") - if strand == '+': + if strand == "+": start_pos = int(start) end_pos = -1 - if strand == '-': + if strand == "-": start_pos = sys.maxsize end_pos = int(end) prev_start, prev_end = intervals.get(primer_name, (sys.maxsize, -1)) diff --git a/src/primaschema/util.py b/src/primaschema/util.py index 1540c77..52edf1c 100644 --- a/src/primaschema/util.py +++ b/src/primaschema/util.py @@ -1,5 +1,6 @@ import subprocess + def run(cmd, cwd="./"): # Helper for CLI testing return subprocess.run( cmd, cwd=cwd, shell=True, check=True, text=True, capture_output=True diff --git a/test/test_all.py b/test/test_all.py index aa8c14c..38535de 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -57,8 +57,7 @@ def test_artic_v41_scheme_hash_matches_primer_hash(): def test_eden_v1_schema_full(): lib.validate_with_linkml_schema( - data_dir / "primer-schemes/eden/v1/info.yml", - full = True + data_dir / "primer-schemes/eden/v1/info.yml", full=True ) @@ -95,7 +94,7 @@ def test_validate_fail_five_columns(): def test_validate_recursive(): - run_cmd = "primaschema validate-recursive primer-schemes" + run("primaschema validate-recursive primer-schemes") def test_hash_bed(): @@ -104,12 +103,12 @@ def test_hash_bed(): def test_build_from_primer_bed(): - run_cmd = run("primaschema build primer-schemes/artic/v4.1 --force") + run("primaschema build primer-schemes/artic/v4.1 --force") run("rm -rf artic-v4.1") def test_build_from_scheme_bed(): - run_cmd = run("primaschema build primer-schemes/eden/v1 --force") + run("primaschema build primer-schemes/eden/v1 --force") run("rm -rf eden-v1") @@ -144,24 +143,21 @@ def test_diff(): run_cmd = run( "primaschema diff primer-schemes/midnight/v1/primer.bed primer-schemes/midnight/v2/primer.bed" ) - assert ( - """chrom chromStart chromEnd name poolName strand sequence origin -MN908947.3 27784 27808 SARS-CoV-2_28_LEFT_27837T 2 + TTTGTGCTTTTTAGCCTTTCTGTT bed2""" - == run_cmd.stdout.strip() - ) + assert """chrom chromStart chromEnd name poolName strand sequence origin +MN908947.3 27784 27808 SARS-CoV-2_28_LEFT_27837T 2 + TTTGTGCTTTTTAGCCTTTCTGTT bed2""" == run_cmd.stdout.strip() def test_calculate_intervals(): - all_intervals = lib.compute_intervals(data_dir / "primer-schemes/artic/v4.1/primer.bed") - assert 'MN908947.3' in all_intervals - intervals = all_intervals['MN908947.3'] - assert 'SARS-CoV-2_99' in intervals - assert intervals['SARS-CoV-2_99'] == (29452, 29854) + all_intervals = lib.compute_intervals( + data_dir / "primer-schemes/artic/v4.1/primer.bed" + ) + assert "MN908947.3" in all_intervals + intervals = all_intervals["MN908947.3"] + assert "SARS-CoV-2_99" in intervals + assert intervals["SARS-CoV-2_99"] == (29452, 29854) def test_print_intervals(): run_cmd = run("primaschema intervals primer-schemes/artic/v4.1/primer.bed") - assert ( - """MN908947.3\t29452\t29854\tSARS-CoV-2_99\n""" in run_cmd.stdout - ) + assert """MN908947.3\t29452\t29854\tSARS-CoV-2_99\n""" in run_cmd.stdout