Skip to content

Commit

Permalink
Fixed all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Anna Sintsova committed Jan 26, 2024
1 parent 5fd1b48 commit 87d9827
Show file tree
Hide file tree
Showing 19 changed files with 884 additions and 351 deletions.
46 changes: 21 additions & 25 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from mbarq.core import Barcode

root = Path("tests/mbarq_test_data")
#root = Path("/Users/ansintsova/Downloads/test_mbarq/mbarq_test_data")

@pytest.fixture
def tn5_structure():
return 'B17N13GTGTATAAGAGACAG'
Expand Down Expand Up @@ -42,40 +42,36 @@ def test_gff():


@pytest.fixture
def count_test_data_tn5():
small_count_file = root/"dnaid1315/test_data/count_test.fasta.gz"
seq_file = root/"dnaid1315/test_data/dnaid1315_124_subsample.fasta.gz"
map_file = root/"dnaid1315/ref/library_11_1.annotated.csv"
return (small_count_file, seq_file, map_file)
def count_test_data():
seq_file = root/"counting/dnaid1315_124_subsample.fasta.gz"
map_file = root/"counting/library.annotated.csv"
return (seq_file, map_file)


@pytest.fixture
def count_test_data_wish():
seq_file = f'./tests/test_files/LibraryA_pilot2.fq.gz'
map_file = f'./tests/test_files/20210520_BarcodeList.csv'
return (seq_file, map_file)
# @pytest.fixture
# def count_test_data_wish():
# seq_file = f'./tests/test_files/LibraryA_pilot2.fq.gz'
# map_file = f'./tests/test_files/20210520_BarcodeList.csv'
# return (seq_file, map_file)


@pytest.fixture
def merge_test_data_tn5():
count_files = [f for f in (root/"dnaid1315/ref").glob("dnaid1315_*_mbarq_counts.csv")]
def merge_test_data():
count_files = [f for f in (root/"counting").glob("*_mbarq_counts.csv")]
return count_files, 'Name'

@pytest.fixture
def analysis_test_data_tn5():
control1col = root/"dnaid1315/ref/controls_1col.csv"
control2col = root / "dnaid1315/ref/controls_2col.csv"
control3col = root / "dnaid1315/ref/controls_3col.csv"
control2col_short = root / "dnaid1315/ref/controls_2col_short.csv"
merged_counts = root/"dnaid1315/ref/example_mbarq_merged_counts.csv"
sample_data = root/"dnaid1315/ref/example_sample_data.csv"
no_wt = root/"dnaid1315/ref/controls_3col_no_wt.csv"
control3bc = root/"dnaid1315/ref/controls_3bc.csv"
def analysis_test_data():
control1col = root/"analysis/controls_1col.csv.gz"
control2col = root / "analysis/controls_2col.csv.gz"
control3col = root / "analysis/controls_3col.csv.gz"
control2col_short = root / "analysis/controls_2col_short.csv.gz"
merged_counts = root/"analysis/example_mbarq_merged_counts.csv.gz"
sample_data = root/"analysis/example_sample_data.csv"
no_wt = root/"analysis/controls_3col_no_wt.csv.gz"
control3bc = root/"analysis/controls_3bc.csv.gz"
return control1col,control2col,control3col,control2col_short,merged_counts,sample_data, no_wt, control3bc

@pytest.fixture
def dnaid1315_expected_outcomes():
return root/"dnaid1315/expected_outcomes"

def capture(command_str):
command = shlex.split(command_str)
Expand Down
Binary file added tests/mbarq_test_data/analysis/controls_1col.csv.gz
Binary file not shown.
Binary file added tests/mbarq_test_data/analysis/controls_2col.csv.gz
Binary file not shown.
Binary file not shown.
Binary file added tests/mbarq_test_data/analysis/controls_3bc.csv.gz
Binary file not shown.
Binary file added tests/mbarq_test_data/analysis/controls_3col.csv.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/mbarq_test_data/counting/cf1_mbarq_counts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
barcode,barcode_count,number_of_reads,insertion_site,chr,strand,multimap,ID,Name,locus_tag,distance_to_feature
TGTTTTGGTAACGCTGC,26486,6210.0,3041118.0,FQ312003.1,plus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
ACGCGCCAGACTTACGC,19424,12303.0,45058.0,HE654725.1,minus,False,gene-SL1344_P2_0056,traY,SL1344_P2_0056,0.0
TATCGAACCACATCATA,18368,5609.0,3040903.0,FQ312003.1,minus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
TCTGCAACGAGTTCAAC,15462,14942.0,3831164.0,FQ312003.1,minus,False,gene-SL1344_3591,yhjV,SL1344_3591,0.0
GATAGCTTGATGACGCA,14535,10502.0,4244139.0,FQ312003.1,minus,False,gene-SL1344_3959,SL1344_3959,SL1344_3959,0.0
CTTCACTGTCATACGAA,14052,8031.0,3440921.0,FQ312003.1,plus,False,gene-SL1344_3223,garD,SL1344_3223,0.0
AGATCGCTGCTCGGGCG,13661,11041.0,4327815.0,FQ312003.1,minus,False,gene-SL1344_4046,SL1344_4046,SL1344_4046,0.0
AGATAACGAAACCACAC,12611,9589.0,4556459.0,FQ312003.1,plus,False,gene-SL1344_4230,yjdB,SL1344_4230,0.0
AAAACCTCCCTGCCCAT,12571,123613.0,7626.0,HE654726.1,minus,False,gene-SL1344_P3_0013,strB,SL1344_P3_0013,0.0
10 changes: 10 additions & 0 deletions tests/mbarq_test_data/counting/cf2_mbarq_counts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
barcode,barcode_count,number_of_reads,insertion_site,chr,strand,multimap,ID,Name,locus_tag,distance_to_feature
TGTTTTGGTAACGCTGC,23119,6210.0,3041118.0,FQ312003.1,plus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
ACGCGCCAGACTTACGC,18312,12303.0,45058.0,HE654725.1,minus,False,gene-SL1344_P2_0056,traY,SL1344_P2_0056,0.0
TATCGAACCACATCATA,17041,5609.0,3040903.0,FQ312003.1,minus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
TCTGCAACGAGTTCAAC,15061,14942.0,3831164.0,FQ312003.1,minus,False,gene-SL1344_3591,yhjV,SL1344_3591,0.0
ACGCAGACCCTCACTTT,13683,237063.0,5151.0,HE654726.1,minus,False,gene-SL1344_P3_0008,mobA,SL1344_P3_0008,0.0
CTTCACTGTCATACGAA,12879,8031.0,3440921.0,FQ312003.1,plus,False,gene-SL1344_3223,garD,SL1344_3223,0.0
GATAGCTTGATGACGCA,12579,10502.0,4244139.0,FQ312003.1,minus,False,gene-SL1344_3959,SL1344_3959,SL1344_3959,0.0
AGATCGCTGCTCGGGCG,11964,11041.0,4327815.0,FQ312003.1,minus,False,gene-SL1344_4046,SL1344_4046,SL1344_4046,0.0
AGATAACGAAACCACAC,11722,9589.0,4556459.0,FQ312003.1,plus,False,gene-SL1344_4230,yjdB,SL1344_4230,0.0
10 changes: 10 additions & 0 deletions tests/mbarq_test_data/counting/cf3_mbarq_counts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
barcode,barcode_count,number_of_reads,insertion_site,chr,strand,multimap,ID,Name,locus_tag,distance_to_feature
TGTTTTGGTAACGCTGC,59248,6210.0,3041118.0,FQ312003.1,plus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
TATCGAACCACATCATA,51787,5609.0,3040903.0,FQ312003.1,minus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
GTGACTCCGTCCAACAG,28086,3173.0,3040751.0,FQ312003.1,minus,False,gene-SL1344_2855,hilD,SL1344_2855,0.0
TCTGCAACGAGTTCAAC,22450,14942.0,3831164.0,FQ312003.1,minus,False,gene-SL1344_3591,yhjV,SL1344_3591,0.0
AGAAGGAGAGCGAATAT,21138,7611.0,3034763.0,FQ312003.1,plus,False,gene-SL1344_2847,hilC,SL1344_2847,0.0
CTTCACTGTCATACGAA,20477,8031.0,3440921.0,FQ312003.1,plus,False,gene-SL1344_3223,garD,SL1344_3223,0.0
TAAACAATGTACATAGA,19519,6793.0,3043598.0,FQ312003.1,minus,False,gene-SL1344_2856,hilA,SL1344_2856,0.0
GATAGCTTGATGACGCA,18396,10502.0,4244139.0,FQ312003.1,minus,False,gene-SL1344_3959,SL1344_3959,SL1344_3959,0.0
GAGCTAACCGATAACGG,17262,18055.0,4000966.0,FQ312003.1,plus,False,gene-SL1344_3745,SL1344_3745,SL1344_3745,0.0
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/mbarq_test_data/counting/library.annotated.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
barcode,chr,insertion_site,abundance_in_mapping_library,gene_start,gene_end,gene_strand,ID,Name,locus_tag,distance_to_feature,percentile
CGTGACAAGCCACTCGG,FQ312003.1,15210,8938,15014,15961,+,gene-SL1344_0014,SL1344_0014,SL1344_0014,0,0.21
CCTGTCGCGATAGCTTG,FQ312003.1,15213,4867,15014,15961,+,gene-SL1344_0014,SL1344_0014,SL1344_0014,0,0.21
ATGAAAAGTATGAATCC,FQ312003.1,16097,9253,16088,16432,+,gene-SL1344_0015,SL1344_0015,SL1344_0015,0,0.03
CGTCAGGGCAGCGAACA,FQ312003.1,18337,3622,18083,19966,+,gene-SL1344_0018,chiA,SL1344_0018,0,0.13
TGTATTGCCCAATAATG,FQ312003.1,19222,6981,18083,19966,+,gene-SL1344_0018,chiA,SL1344_0018,0,0.6
TGTGCGGTGGCTATCAC,FQ312003.1,21143,4287,20058,23054,+,gene-SL1344_0019,SL1344_0019,SL1344_0019,0,0.36
AAACCGGTGCCTGGTGT,FQ312003.1,22783,96,20058,23054,+,gene-SL1344_0019,SL1344_0019,SL1344_0019,0,0.91
AGTAAGCGGCCCATAGA,FQ312003.1,22885,2248,20058,23054,+,gene-SL1344_0019,SL1344_0019,SL1344_0019,0,0.94
TGGTCCCGGTCCCGGAG,FQ312003.1,23616,4559,23335,24039,+,gene-SL1344_0020,SL1344_0020,SL1344_0020,0,0.4
CGGAAAGGAGGACCTGG,FQ312003.1,3049213,8863,3047255,3049312,-,gene-SL1344_2861,sipA,SL1344_2861,0,0.05
GAAGTCTGGAAAACATT,FQ312003.1,3999654,9023,3998699,3999891,-,gene-SL1344_3741,nepI,SL1344_3741,0,0.2
TCGCGATGTAATATATA,FQ312003.1,4246978,7934,4246448,4247140,-,gene-SL1344_3962,ompL,SL1344_3962,0,0.23
AACAAGACCGAAATGCG,HE654725.1,70268,11044,70132,70383,+,gene-SL1344_P2_0081,shfB,SL1344_P2_0081,0,0.54
59 changes: 25 additions & 34 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import subprocess
import shlex
from pathlib import Path
from test_utils import assert_files_are_same
import pandas as pd


def capture(command_str):
Expand All @@ -19,54 +19,45 @@ def to_str(bytes_or_str):
value = bytes_or_str
return value

OUTDIR= "/nfs/cds-peta/exports/biol_micro_cds_gr_sunagawa/scratch/ansintsova/Projects_NCCR/hardt/nguyenb/tnseq/scratch/tmp"


def test_cli_analysis(analysis_test_data_tn5, tmpdir, dnaid1315_expected_outcomes):
_, _, _, controls, count_file, sample_file, _ = analysis_test_data_tn5
treat_col, batch_col, bline = 'day', 'experiment', 'd0'
cmd_str = f'mbarq analyze -i {count_file} -s {sample_file} ' \
f'-c {controls} --treatment_column {treat_col} ' \
f'--batch_column {batch_col} --baseline {bline} ' \
f' -o {tmpdir} -g Name -n cli_analysis_test1 '
subprocess.call(shlex.split(cmd_str))
actual_rra = tmpdir.join("cli_analysis_test1_rra_results.csv")
expected_rra = dnaid1315_expected_outcomes / "test_process_results_rra_results.csv"
assert_files_are_same(actual_rra, expected_rra)


def test_cli_analysis_no_batch(analysis_test_data_tn5, tmpdir, dnaid1315_expected_outcomes):
_, _, _, controls, count_file, sample_file, _ = analysis_test_data_tn5
treat_col, batch_col, bline = 'day', '', 'd0'
def test_cli_analysis_no_batch(analysis_test_data, tmpdir):
_, _, _, controls, count_file, sample_file, _, _ = analysis_test_data
treat_col, bline = 'day', 'd0'
cmd_str = f'mbarq analyze -i {count_file} -s {sample_file} ' \
f'-c {controls} --treatment_column {treat_col} ' \
f'--baseline {bline} ' \
f' -o {tmpdir} -g Name -n cli_analysis_test2 '
subprocess.call(shlex.split(cmd_str))
actual_rra = tmpdir.join("cli_analysis_test2_rra_results.csv")
expected_rra = dnaid1315_expected_outcomes / "test_run_experiment_rra_results_no_batch.csv"
assert_files_are_same(actual_rra, expected_rra)
actual_results = pd.read_csv(tmpdir.join("cli_analysis_test2_rra_results.csv")).to_dict()
sample_results = {'Name': {1283: 'sciX', 1955: 'SL1344_4468', 2115: 'xylA', 1704: 'yieM', 3475: 'basR', 920: 'tatA', 2017: 'dcoA', 945: 'lrhA', 2361: 'SL1344_2691', 213: 'SL1344_1264', 1356: 'SL1344_1477', 1931: 'SL1344_1567', 1742: 'STM3026', 2251: 'SL1344_2738', 3285: 'ampH', 2865: 'prgH', 21: 'rfbX', 993: 'pspF', 3558: 'SL1344_0019', 99: 'ilvI'}, 'number_of_barcodes': {1283: 1, 1955: 5, 2115: 1, 1704: 1, 3475: 1, 920: 2, 2017: 1, 945: 1, 2361: 1, 213: 1, 1356: 1, 1931: 4, 1742: 1, 2251: 2, 3285: 1, 2865: 1, 21: 1, 993: 1, 3558: 3, 99: 2}, 'LFC': {1283: 0.35757, 1955: -0.57837, 2115: -0.16625, 1704: 0.72942, 3475: 0.05004, 920: -6.9198, 2017: -0.38311, 945: -2.4561, 2361: 0.14491, 213: 0.078651, 1356: 0.40869, 1931: 0.38251, 1742: 0.7928, 2251: 0.15916, 3285: -0.38851, 2865: -4.9191, 21: -2.7876, 993: -0.13799, 3558: -0.41019, 99: -0.118}, 'neg_selection_fdr': {1283: 1.0, 1955: 0.924097, 2115: 0.924097, 1704: 1.0, 3475: 0.593285, 920: 6.1e-05, 2017: 0.924097, 945: 6.1e-05, 2361: 0.924097, 213: 1.0, 1356: 1.0, 1931: 0.924097, 1742: 1.0, 2251: 0.924097, 3285: 0.257293, 2865: 1.7e-05, 21: 4.8e-05, 993: 1.0, 3558: 0.521566, 99: 4.8e-05}, 'pos_selection_fdr': {1283: 6e-06, 1955: 1.0, 2115: 0.888145, 1704: 6e-06, 3475: 1.0, 920: 1.0, 2017: 0.888145, 945: 1.0, 2361: 0.888145, 213: 0.43025, 1356: 6e-06, 1931: 0.888145, 1742: 6e-06, 2251: 3.6e-05, 3285: 1.0, 2865: 1.0, 21: 1.0, 993: 0.550381, 3558: 1.0, 99: 0.646139}, 'contrast': {1283: 'd2', 1955: 'd3', 2115: 'd3', 1704: 'd2', 3475: 'd4', 920: 'd2', 2017: 'd3', 945: 'd2', 2361: 'd3', 213: 'd1', 1356: 'd2', 1931: 'd3', 1742: 'd2', 2251: 'd3', 3285: 'd4', 2865: 'd4', 21: 'd1', 993: 'd2', 3558: 'd4', 99: 'd1'}}
print(actual_results)
for key in sample_results.keys():
ar = actual_results[key]
assert all([v == ar[k] for k, v in sample_results[key].items()])


def test_cli_analysis_no_control(analysis_test_data_tn5, tmpdir, dnaid1315_expected_outcomes):
_, _, _, controls, count_file, sample_file, _ = analysis_test_data_tn5
treat_col, batch_col, bline = 'day', 'experiment', 'd0'
def test_cli_analysis_no_control(analysis_test_data, tmpdir):
_, _, _, _, count_file, sample_file, _, _ = analysis_test_data
treat_col, bline = 'day', 'd0'
cmd_str = f'mbarq analyze -i {count_file} -s {sample_file} ' \
f'--treatment_column {treat_col} ' \
f'--batch_column {batch_col} --baseline {bline} ' \
f' --baseline {bline} ' \
f' -o {tmpdir} -g Name -n cli_analysis_test3 '
subprocess.call(shlex.split(cmd_str))
actual_rra = tmpdir.join("cli_analysis_test3_rra_results.csv")
expected_rra = dnaid1315_expected_outcomes / "test_run_experiment_rra_results_no_control.csv"
assert_files_are_same(actual_rra, expected_rra)
actual_results = pd.read_csv(tmpdir.join("cli_analysis_test3_rra_results.csv"))
sample_results = {'Name': {107: 'gtgA', 342: 'sseI', 2271: 'aroG', 633: 'yjfM', 1506: 'SL1344_3106', 3406: 'SL1344_0330', 2937: 'SL1344_0832', 1326: 'SL1344_0699', 77: 'sipA', 1209: 'tehB', 2692: 'STnc780', 3074: 'SL1344_3750', 2037: 'adi', 811: 'yibK', 1394: 'yrbD', 1734: 'STnc710', 1431: 'SL1344_0702', 71: 'yjgF', 2450: 'ydjM', 843: 'sopD'}, 'number_of_barcodes': {107: 3, 342: 1, 2271: 1, 633: 2, 1506: 2, 3406: 1, 2937: 3, 1326: 3, 77: 4, 1209: 1, 2692: 2, 3074: 3, 2037: 1, 811: 1, 1394: 1, 1734: 2, 1431: 2, 71: 1, 2450: 1, 843: 8}, 'LFC': {107: -0.026237, 342: -0.040027, 2271: -0.012038, 633: 0.008158, 1506: 0.048663, 3406: -3.7883, 2937: -5.5681, 1326: -0.081906, 77: -0.084469, 1209: -0.25546, 2692: 0.25638, 3074: -3.9149, 2037: -0.26044, 811: 0.27488, 1394: -0.032308, 1734: 0.15721, 1431: 0.035906, 71: -0.97829, 2450: 0.15414, 843: 0.021523}, 'neg_selection_fdr': {107: 0.994505, 342: 0.999867, 2271: 0.999995, 633: 0.999867, 1506: 0.999995, 3406: 0.998009, 2937: 0.998009, 1326: 0.999995, 77: 0.915541, 1209: 0.950881, 2692: 0.999995, 3074: 0.998009, 2037: 0.843819, 811: 0.999867, 1394: 0.99656, 1734: 0.999995, 1431: 0.999995, 71: 0.49404, 2450: 0.999995, 843: 0.999867}, 'pos_selection_fdr': {107: 0.999995, 342: 0.999995, 2271: 0.996152, 633: 0.999995, 1506: 0.999995, 3406: 0.970383, 2937: 0.970383, 1326: 0.999995, 77: 0.999995, 1209: 0.999995, 2692: 0.973244, 3074: 0.987142, 2037: 0.999995, 811: 0.999995, 1394: 0.999995, 1734: 0.999995, 1431: 0.999995, 71: 0.999995, 2450: 0.973244, 843: 0.999995}, 'contrast': {107: 'd1', 342: 'd1', 2271: 'd3', 633: 'd1', 1506: 'd2', 3406: 'd4', 2937: 'd4', 1326: 'd2', 77: 'd1', 1209: 'd2', 2692: 'd3', 3074: 'd4', 2037: 'd3', 811: 'd1', 1394: 'd2', 1734: 'd2', 1431: 'd2', 71: 'd1', 2450: 'd3', 843: 'd1'}}
for key in sample_results.keys():
ar = actual_results[key]
assert all([v == ar[k] for k, v in sample_results[key].items()])




def test_cli_analysis_log(analysis_test_data_tn5, tmpdir, dnaid1315_expected_outcomes):
_, _, _, controls, count_file, sample_file, _ = analysis_test_data_tn5
treat_col, batch_col, bline = 'day', 'experiment', 'd0'
def test_cli_analysis_log(analysis_test_data, tmpdir):
_, _, _, controls, count_file, sample_file, _, _ = analysis_test_data
treat_col, bline = 'day', 'd0'
cmd_str = f'mbarq analyze -i {count_file} -s {sample_file} ' \
f'-c {controls} --treatment_column {treat_col} ' \
f'--batch_column {batch_col} --baseline {bline} ' \
f' -o {OUTDIR} -g Name -n cli_analysis_test1 '
f' --baseline {bline} ' \
f' -o {tmpdir} -g Name -n cli_analysis_test1 '
subprocess.call(shlex.split(cmd_str))
Loading

0 comments on commit 87d9827

Please sign in to comment.