Add production change (#146)

* Add production change * Updated test metapool package now stores boolean values in Bioinformatics section as bool rather than string. Hence, the conversion step in mg-scripts and associated test have been removed.
biocore · Jul 3, 2024 · d733730 · d733730
1 parent 76bbdde
commit d733730
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 38 deletions.
diff --git a/sequence_processing_pipeline/NuQCJob.py b/sequence_processing_pipeline/NuQCJob.py
@@ -383,18 +383,6 @@ def _process_sample_sheet(self):
         # the ordering of the rows will be preserved in the order of the list.
         lst = bioinformatics.to_dict('records')
 
-        # convert true/false and yes/no strings to true boolean values.
-        for record in lst:
-            # the subset of columns that should be either True or False.
-            for key in ['BarcodesAreRC', 'HumanFiltering']:
-                val = record[key].strip()
-                if val == 'True':
-                    record[key] = True
-                elif val == 'False':
-                    record[key] = False
-                else:
-                    raise ValueError(f"'{val}' is not a valid value for {key}")
-
         # human-filtering jobs are scoped by project. Each job requires
         # particular knowledge of the project.
         return {'chemistry': chemistry,

diff --git a/sequence_processing_pipeline/Pipeline.py b/sequence_processing_pipeline/Pipeline.py
@@ -705,13 +705,19 @@ def get_project_info(self, short_names=False):
         results = []
 
         if self.mapping_file is not None:
+            if 'contains_replicates' in self.mapping_file:
+                contains_replicates = True
+            else:
+                contains_replicates = False
+
             sample_project_map = {pn: _df.sample_name.values for pn, _df in
                                   self.mapping_file.groupby('project_name')}
 
             for project in sample_project_map:
                 p_name, q_id = self._parse_project_name(project, short_names)
                 results.append(
-                    {'project_name': p_name, 'qiita_id': q_id})
+                    {'project_name': p_name, 'qiita_id': q_id,
+                     'contains_replicates': contains_replicates})
         else:
             bioinformatics = self.sample_sheet.Bioinformatics
             for res in bioinformatics.to_dict('records'):

diff --git a/sequence_processing_pipeline/tests/test_NuQCJob.py b/sequence_processing_pipeline/tests/test_NuQCJob.py
@@ -966,30 +966,6 @@ def test_nuqcjob_creation(self):
                 self.pmls_path,
             )
 
-        with self.assertRaisesRegex(
-            ValueError, "'FALSE' is not a valid value" " for HumanFiltering"
-        ):
-            NuQCJob(
-                self.fastq_root_path,
-                self.output_path,
-                self.bad_sheet_bools_path,
-                self.mmi_db_paths,
-                "queue_name",
-                1,
-                1440,
-                "8",
-                "fastp",
-                "minimap2",
-                "samtools",
-                [],
-                self.qiita_job_id,
-                1000,
-                "",
-                self.movi_path,
-                self.gres_value,
-                self.pmls_path,
-            )
-
     def test_error_msg_from_logs(self):
         job = NuQCJob(
             self.fastq_root_path,

diff --git a/sequence_processing_pipeline/tests/test_Pipeline.py b/sequence_processing_pipeline/tests/test_Pipeline.py
@@ -2202,7 +2202,8 @@ def test_get_sample_names(self):
     def test_get_project_info(self):
         exp_proj_info = [
             {'project_name': 'ABTX_20230208_ABTX_11052',
-             'qiita_id': '11052'}]
+             'qiita_id': '11052',
+             'contains_replicates': False}]
 
         exp_project_names = ['ABTX_20230208_ABTX_11052']