Skip to content

Commit

Permalink
[MRG] several minor updates and cleanup (#129)
Browse files Browse the repository at this point in the history
* update versions

* several minor updates

* remove last vestiges of /depth/

* update versions in setup.py
  • Loading branch information
ctb authored Jan 1, 2022
1 parent 93060ec commit 65acc69
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 24 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ channels:
- defaults
dependencies:
- mamba
- snakemake-minimal==6.6.1
- snakemake-minimal==6.12.3
- click>=7,<8
- lxml==4.6.4
- pandas>1,<2
Expand Down
42 changes: 25 additions & 17 deletions genome_grist/conf/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ SAMPLES=config['sample']
print(f'sample: {SAMPLES}', file=sys.stderr)
assert isinstance(SAMPLES, list), "config 'sample' must be a list."

fail = False
for sample in SAMPLES:
if '.' in sample:
print(f"sample name '{sample}' contains a period; please remove",
file=sys.stderr)
fail = True

if fail:
sys.exit(-1)

outdir = config.get('outdir', 'outputs/')
outdir = outdir.rstrip('/')
print('outdir:', outdir, file=sys.stderr)
Expand Down Expand Up @@ -148,7 +158,7 @@ class Checkpoint_GatherResults:

def get_genome_accs(self, sample):
gather_csv = f'{outdir}/genbank/{sample}.x.genbank.gather.csv'
assert os.path.exists(gather_csv)
assert os.path.exists(gather_csv), "gather output does not exist!?"

genome_accs = []
with open(gather_csv, 'rt') as fp:
Expand All @@ -162,8 +172,6 @@ class Checkpoint_GatherResults:
return genome_accs

def __call__(self, w):
global checkpoints

# get 'sample' from wildcards?
if self.samples is None:
return self.do_sample(w)
Expand Down Expand Up @@ -220,8 +228,8 @@ rule download_matching_genomes:
@toplevel
rule map_reads:
input:
expand(f"{outdir}/minimap/depth/{{sample}}.summary.csv", sample=SAMPLES),
expand(f"{outdir}/leftover/depth/{{sample}}.summary.csv", sample=SAMPLES)
expand(f"{outdir}/minimap/{{sample}}.summary.csv", sample=SAMPLES),
expand(f"{outdir}/leftover/{{sample}}.summary.csv", sample=SAMPLES)

@toplevel
rule build_consensus:
Expand Down Expand Up @@ -261,8 +269,8 @@ rule check:
rule zip:
shell: """
rm -f transfer.zip
zip -r transfer.zip {outdir}/leftover/depth/*.summary.csv \
{outdir}/minimap/depth/*.summary.csv {outdir}/*.yaml \
zip -r transfer.zip {outdir}/leftover/*.summary.csv \
{outdir}/minimap/*.summary.csv {outdir}/*.yaml \
{outdir}/genbank/*.csv {outdir}/gathertax/ {outdir}/reports/
"""

Expand Down Expand Up @@ -466,7 +474,7 @@ rule bam_to_depth_wc:
input:
bam = outdir + "/{dir}/{bam}.bam",
output:
depth = outdir + "/{dir}/depth/{bam}.txt",
depth = outdir + "/{dir}/{bam}.depth.txt",
conda: "env/minimap2.yml"
shell: """
samtools depth -aa {input.bam} > {output.depth}
Expand All @@ -477,7 +485,7 @@ rule bam_covered_regions_wc:
input:
bam = outdir + "/{dir}/{bam}.bam",
output:
regions = outdir + "/{dir}/depth/{bam}.regions.bed",
regions = outdir + "/{dir}/{bam}.regions.bed",
conda: "env/covtobed.yml"
shell: """
covtobed {input.bam} -l 100 -m 1 | \
Expand Down Expand Up @@ -508,7 +516,7 @@ rule build_new_consensus_wc:
input:
vcf = outdir + "/{dir}/{sample}.x.{acc}.vcf.gz",
query = ancient("genbank_genomes/{acc}_genomic.fna.gz"),
regions = outdir + "/{dir}/depth/{sample}.x.{acc}.regions.bed",
regions = outdir + "/{dir}/{sample}.x.{acc}.regions.bed",
output:
mask = outdir + "/{dir}/{sample}.x.{acc}.mask.bed",
genomefile = outdir + "/{dir}/{sample}.x.{acc}.fna.gz.sizes",
Expand All @@ -528,9 +536,9 @@ rule build_new_consensus_wc:
# summarize depth into a CSV
rule summarize_samtools_depth_wc:
input:
Checkpoint_GatherResults(outdir + f"/{{dir}}/depth/{{sample}}.x.{{acc}}.txt")
Checkpoint_GatherResults(outdir + f"/{{dir}}/{{sample}}.x.{{acc}}.depth.txt")
output:
csv = f"{outdir}/{{dir}}/depth/{{sample}}.summary.csv"
csv = f"{outdir}/{{dir}}/{{sample}}.summary.csv"
shell: """
python -m genome_grist.summarize_mapping {wildcards.sample} \
{input} -o {output.csv}
Expand Down Expand Up @@ -618,8 +626,8 @@ rule make_taxonomy_notebook_wc:
rule make_mapping_notebook_wc:
input:
nb = srcdir('../notebooks/report-mapping.ipynb'),
all_csv = ancient(f"{outdir}/minimap/depth/{{sample}}.summary.csv"),
depth_csv = ancient(f"{outdir}/leftover/depth/{{sample}}.summary.csv"),
all_csv = ancient(f"{outdir}/minimap/{{sample}}.summary.csv"),
depth_csv = ancient(f"{outdir}/leftover/{{sample}}.summary.csv"),
gather_csv = f'{outdir}/genbank/{{sample}}.x.genbank.gather.csv',
genomes_info_csv = ancient(f"{outdir}/genbank/{{sample}}.genomes.info.csv"),
kernel_set = rules.set_kernel.output,
Expand All @@ -632,8 +640,8 @@ rule make_mapping_notebook_wc:
conda: 'env/papermill.yml'
shell: """
papermill {input.nb} {output.nb} -k genome_grist \
-p sample_id {wildcards.sample:q} -p render '' -p outdir {outdir:q}\
--cwd {params.cwd}
-p sample_id {wildcards.sample:q} -p render '' \
-p outdir {outdir:q} --cwd {params.cwd}
python -m nbconvert {output.nb} --to html --stdout --no-input \
--ExecutePreprocessor.kernel_name=genome_grist > {output.html}
"""
Expand All @@ -657,7 +665,7 @@ rule extract_leftover_reads_wc:
# rule for mapping leftover reads to genomes -> BAM
rule map_leftover_reads_wc:
input:
all_csv = f"{outdir}/minimap/depth/{{sample}}.summary.csv",
all_csv = f"{outdir}/minimap/{{sample}}.summary.csv",
query = ancient(f"genbank_genomes/{{acc}}_genomic.fna.gz"),
leftover_reads_flag = f"{outdir}/.leftover-reads.{{sample}}",
output:
Expand Down
3 changes: 1 addition & 2 deletions genome_grist/conf/env/papermill.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ channels:
- bioconda
- defaults
dependencies:
- papermill>=2.1.2,<3
- papermill>=2.3.3,<3
- notebook>=6,<7
- plotly>=4.9.0,<5
- ipykernel
- matplotlib>=3.4.3,<4
- numpy>=1.21.3,<2
Expand Down
2 changes: 1 addition & 1 deletion genome_grist/conf/env/sourmash.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
- python>=3.7,<3.10
- screed
- sourmash>=4.2.1,<5
- sourmash>=4.2.3,<5
- pip
- pip:
- git+https://github.com/dib-lab/genome-grist.git#egg=genome-grist
4 changes: 2 additions & 2 deletions genome_grist/notebooks/report-mapping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
"source": [
"\n",
"# load mapping CSVs\n",
"all_df = pd.read_csv(f'../../{outdir}/minimap/depth/{sample_id}.summary.csv')\n",
"left_df = pd.read_csv(f'../../{outdir}/leftover/depth/{sample_id}.summary.csv')\n",
"all_df = pd.read_csv(f'../../{outdir}/minimap/{sample_id}.summary.csv')\n",
"left_df = pd.read_csv(f'../../{outdir}/leftover/{sample_id}.summary.csv')\n",
"\n",
"# load gather CSV\n",
"gather_df = pd.read_csv(f'../../{outdir}/genbank/{sample_id}.x.genbank.gather.csv')\n",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"pytest-runner",
],
use_scm_version={"write_to": "genome_grist/version.py"},
install_requires=["snakemake==6.6.1", "click>=7,<8", "lxml==4.6.4",
install_requires=["snakemake==6.12.3", "click>=7,<8", "lxml==4.6.4",
"pandas>1,<2"],
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 65acc69

Please sign in to comment.