Skip to content

Commit

Permalink
More updates, based on qp-klp changes (#113)
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-cowart authored Dec 8, 2023
1 parent b187513 commit 4fd447b
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 18 deletions.
1 change: 0 additions & 1 deletion sequence_processing_pipeline/FastQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self, run_dir, output_path, raw_fastq_files_path,
self.fastqc_path = fastqc_path
self.queue_name = queue_name
self.node_count = node_count
self.nprocs = nprocs
self.wall_time_limit = wall_time_limit
self.jmem = jmem
self.qiita_job_id = qiita_job_id
Expand Down
14 changes: 11 additions & 3 deletions sequence_processing_pipeline/NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class NuQCJob(Job):
def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
minimap_database_paths, queue_name, node_count,
wall_time_limit, jmem, fastp_path, minimap2_path,
samtools_path, modules_to_load, qiita_job_id, pool_size,
samtools_path, modules_to_load, qiita_job_id,
max_array_length, known_adapters_path, bucket_size=8,
length_limit=100, cores_per_task=4):
"""
Expand All @@ -41,7 +41,6 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
:param samtools_path: The path to the samtools executable
:param modules_to_load: A list of Linux module names to load
:param qiita_job_id: identify Torque jobs using qiita_job_id
:param pool_size: The number of jobs to process concurrently.
:param known_adapters_path: The path to an .fna file of known adapters.
:param bucket_size: the size in GB of each bucket to process
:param length_limit: reads shorter than this will be discarded.
Expand All @@ -68,7 +67,6 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
self.minimap2_path = minimap2_path
self.samtools_path = samtools_path
self.qiita_job_id = qiita_job_id
self.pool_size = pool_size
self.suffix = 'fastq.gz'

# for projects that use sequence_processing_pipeline as a dependency,
Expand All @@ -82,7 +80,17 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
self.known_adapters_path = known_adapters_path
self.max_file_list_size_in_gb = bucket_size
self.length_limit = length_limit

# NuQCJob() impl uses -c (--cores-per-task) switch instead of
# -n (--tasks-per-node). --cores-per-task requests the number of cpus
# per process. This is to support multithreaded jobs that require more
# than one cpu per task. All cores will be allocated on a single node.
#
# This is different than using -n + -N (number of nodes to request)
# because it's allowable to request more cores than are available on
# one node using this pair of switches (N nodes * n tasks per node).
self.cores_per_task = cores_per_task

self.temp_dir = join(self.output_path, 'tmp')
makedirs(self.temp_dir, exist_ok=True)

Expand Down
11 changes: 7 additions & 4 deletions sequence_processing_pipeline/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,20 @@
},
"nu-qc": {
"nodes": 1,
"nprocs": 8,
"cpus_per_task": 8,
"queue": "qiita",
"wallclock_time_in_minutes": 240,
"minimap2_databases": "/panfs/cguccion/23_06_25_Pangenome_Assembley/downloaded_fastqs/fastq_files/pangenome_individual_mmi",
"modules_to_load": ["fastp_0.20.1", "samtools_1.12", " minimap2_2.18"],
"fastp_executable_path": "fastp",
"minimap2_executable_path": "minimap2",
"samtools_executable_path": "samtools",
"job_total_memory_limit": "20gb",
"job_pool_size": 30,
"job_max_array_length": 1000
"job_total_memory_limit": "20",
"job_max_array_length": 1000,
"known_adapters_path": "fastp_known_adapters_formatted.fna",
"bucket_size": 8,
"length_limit": 100,
"cores_per_task": 4
},
"seqpro": {
"seqpro_path": "seqpro",
Expand Down
20 changes: 10 additions & 10 deletions sequence_processing_pipeline/tests/test_NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def test_nuqcjob_creation(self):
NuQCJob(self.fastq_root_path, self.output_path,
'not/path/to/sample/sheet', self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id, 30,
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
1000, '')

self.assertEqual(str(e.exception), "file 'not/path/to/sample/sheet' "
Expand All @@ -567,7 +567,7 @@ def test_nuqcjob_creation(self):
self.tmp_file_path, self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [],
self.qiita_job_id, 30, 1000, '')
self.qiita_job_id, 1000, '')

self.assertFalse(nuqcjob is None)

Expand All @@ -583,7 +583,7 @@ def test_nuqcjob_creation(self):
self.tmp_file_path, self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

self.assertEqual(str(e.exception), ("Assay value 'NotMetagenomic' is "
"not recognized."))
Expand All @@ -592,7 +592,7 @@ def test_nuqcjob_creation(self):
NuQCJob(self.fastq_root_path, self.output_path,
self.bad_sheet_bools_path, self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id, 30,
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
1000, '')

self.assertEqual(str(e.exception),
Expand All @@ -603,7 +603,7 @@ def test_assay_value(self):
NuQCJob(self.fastq_root_path, self.output_path,
self.bad_sample_sheet_path, self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id, 30,
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
1000, '')

self.assertEqual(str(e.exception), "Assay value 'Metagenomics' is not"
Expand All @@ -614,7 +614,7 @@ def test_audit(self):
self.good_sample_sheet_path, self.mmi_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

obs = job.audit(self.sample_ids)

Expand Down Expand Up @@ -1044,7 +1044,7 @@ def test_completed_file_generation(self):
self.good_sample_sheet_path, double_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

my_path = ('sequence_processing_pipeline/tests/data/output_dir/'
'NuQCJob')
Expand All @@ -1067,7 +1067,7 @@ def test_completed_file_generation_some_failures(self):
self.good_sample_sheet_path, double_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

# test _confirm_job_completed() fails when a .completed file isn't
# manually created.
Expand All @@ -1079,7 +1079,7 @@ def test_generate_job_script(self):
self.good_sample_sheet_path, double_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

job_script_path = job._generate_job_script()

Expand Down Expand Up @@ -1216,7 +1216,7 @@ def test_regular_expressions(self):
self.good_sample_sheet_path, double_db_paths,
'queue_name', 1, 1440, '8gb',
'fastp', 'minimap2', 'samtools', [], self.qiita_job_id,
30, 1000, '')
1000, '')

# a sample of known valid fastq file-names plus a few edge-cases.
good_names = ['11407-AAGTAGGAAGGA_S3249_L002_R1_001.fastq.gz',
Expand Down

0 comments on commit 4fd447b

Please sign in to comment.