Merge pull request #134 from mgcam/column_spec

comment extended, a new column for whole genome yield
wtsi-npg · Nov 18, 2019 · 63636e5 · 63636e5
2 parents 04f8642 + ba1746b
commit 63636e5
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 13 deletions.
diff --git a/Changes b/Changes
@@ -1,6 +1,14 @@
 LIST OF CHANGES
 ---------------
 
+release 6.5.0
+ - more detailed comments for some columns.
+ - 'annotation' column reduced to 15 chars since it's not free text
+ - added columns for whole genome yield and end of phase2 analysis
+   date
+ - fixed spelling in one of the columns, ie changed column name
+   from archive_conformation_date to archive_confirmation_date
+
 release 6.4.0
  - iseq_external_product_component table redesign to allow for
    rows on iseq_product_metrics table to be deleted and recreated

diff --git a/lib/WTSI/DNAP/Warehouse/Schema/Result/IseqExternalProductMetric.pm b/lib/WTSI/DNAP/Warehouse/Schema/Result/IseqExternalProductMetric.pm
@@ -166,23 +166,23 @@ Archive ID for data product
   is_nullable: 1
   size: 15
 
-One of 'PASS', 'HOLD', 'INSUFFICIENT', 'FAIL'
+Overall status of the product, one of 'PASS', 'HOLD', 'INSUFFICIENT', 'FAIL'
 
 =head2 qc_overall_assessment
 
   data_type: 'char'
   is_nullable: 1
   size: 4
 
-Final data product criteria evaluation outcome as 'PASS' or 'FAIL'
+State of the product after phase 3 of processing, one of 'PASS' or 'FAIL'
 
 =head2 qc_status
 
   data_type: 'char'
   is_nullable: 1
   size: 15
 
-One of 'PASS', 'HOLD', 'INSUFFICIENT', 'FAIL'
+State of the product after phase 2 of processing, one of 'PASS', 'HOLD', 'INSUFFICIENT', 'FAIL'
 
 =head2 sequencing_start_date
 
@@ -222,6 +222,14 @@ Processing start date
   datetime_undef_if_invalid: 1
   is_nullable: 1
 
+=head2 phase2_end_date
+
+  data_type: 'datetime'
+  datetime_undef_if_invalid: 1
+  is_nullable: 1
+
+Date the phase 2 analysis finished for this product
+
 =head2 analysis_end_date
 
   data_type: 'date'
@@ -236,13 +244,13 @@ Processing start date
 
 Date made available or pushed to archive service
 
-=head2 archive_conformation_date
+=head2 archive_confirmation_date
 
   data_type: 'date'
   datetime_undef_if_invalid: 1
   is_nullable: 1
 
-Date of confirmation of integrity of data products by archive service
+Date of confirmation of integrity of data product by archive service
 
 =head2 md5
 
@@ -296,9 +304,9 @@ Comma separated sorted list of flowcell IDs obtained from the CRAM file header(s
 
   data_type: 'varchar'
   is_nullable: 1
-  size: 256
+  size: 15
 
-annotation regarding data provenance, i.e. is sequence data from first pass, re-run, top-up etc.
+Annotation regarding data provenance, i.e. is sequence data from first pass, re-run, top-up, etc.
 
 =head2 min_read_length
 
@@ -364,33 +372,45 @@ Fraction of marker pairs with two read pairs evidencing parity and non-parity, m
 
 'PASS' or 'FAIL' based on verify_bam_id_score_assessment and double_error_fraction < 0.2%
 
+=head2 yield_whole_genome
+
+  data_type: 'float'
+  extra: {unsigned => 1}
+  is_nullable: 1
+
+Sequence data quantity (Gb) excluding duplicate reads, adaptors, overlapping bases from reads on the same fragment, soft-clipped bases
+
 =head2 yield
 
   data_type: 'float'
   extra: {unsigned => 1}
   is_nullable: 1
 
-sequence data quantity (Gb), autosome
+Sequence data quantity (Gb) excluding duplicate reads, adaptors, overlapping bases from reads on the same fragment, soft-clipped bases, non-N autosome only
 
 =head2 yield_q20
 
   data_type: 'bigint'
   extra: {unsigned => 1}
   is_nullable: 1
 
+Yield in bases at or above Q20 filtered in the same way as the yield column values
+
 =head2 yield_q30
 
   data_type: 'bigint'
   extra: {unsigned => 1}
   is_nullable: 1
 
+Yield in bases at or above Q30 filtered in the same way as the yield column values
+
 =head2 num_reads
 
   data_type: 'bigint'
   extra: {unsigned => 1}
   is_nullable: 1
 
-Number of reads
+Number of reads filtered in the same way as the yield column values
 
 =head2 gc_fraction_forward_read
 
@@ -442,6 +462,8 @@ Minimum of TOTAL_QSCORE values in BaitBias report from CollectSequencingArtifact
   extra: {unsigned => 1}
   is_nullable: 1
 
+Fraction of properly paired mapped reads filtered in the same way as the yield column values
+
 =head2 target_proper_pair_mapped_reads_assessment
 
   data_type: 'char'
@@ -650,11 +672,17 @@ __PACKAGE__->add_columns(
   { data_type => 'date', datetime_undef_if_invalid => 1, is_nullable => 1 },
   'analysis_start_date',
   { data_type => 'date', datetime_undef_if_invalid => 1, is_nullable => 1 },
+  'phase2_end_date',
+  {
+    data_type => 'datetime',
+    datetime_undef_if_invalid => 1,
+    is_nullable => 1,
+  },
   'analysis_end_date',
   { data_type => 'date', datetime_undef_if_invalid => 1, is_nullable => 1 },
   'archival_date',
   { data_type => 'date', datetime_undef_if_invalid => 1, is_nullable => 1 },
-  'archive_conformation_date',
+  'archive_confirmation_date',
   { data_type => 'date', datetime_undef_if_invalid => 1, is_nullable => 1 },
   'md5',
   { data_type => 'char', is_nullable => 1, size => 32 },
@@ -669,7 +697,7 @@ __PACKAGE__->add_columns(
   'flowcell_id',
   { data_type => 'varchar', is_nullable => 1, size => 256 },
   'annotation',
-  { data_type => 'varchar', is_nullable => 1, size => 256 },
+  { data_type => 'varchar', is_nullable => 1, size => 15 },
   'min_read_length',
   { data_type => 'tinyint', extra => { unsigned => 1 }, is_nullable => 1 },
   'target_autosome_coverage_threshold',
@@ -691,6 +719,8 @@ __PACKAGE__->add_columns(
   { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 },
   'contamination_assessment',
   { data_type => 'char', is_nullable => 1, size => 4 },
+  'yield_whole_genome',
+  { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 },
   'yield',
   { data_type => 'float', extra => { unsigned => 1 }, is_nullable => 1 },
   'yield_q20',
@@ -795,8 +825,8 @@ __PACKAGE__->has_many(
 );
 
 
-# Created by DBIx::Class::Schema::Loader v0.07049 @ 2019-10-31 15:09:17
-# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:bZN6W3/2xfdBmBWBAS2Ybw
+# Created by DBIx::Class::Schema::Loader v0.07049 @ 2019-11-18 12:17:04
+# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:nMWf/wxpcml/vgyJp3FC0A
 
 use Readonly;
 use Try::Tiny;

diff --git a/scripts/update_schema_6.5.0.sql b/scripts/update_schema_6.5.0.sql
@@ -0,0 +1,34 @@
+-- More detailed comments for some columns.
+-- Annotation column reduced to 15 chars since it's not free text.
+-- Added column for whole genome yield
+-- Added column for phase2 analysis end date
+-- Fixed spelling for one column name
+ALTER TABLE `iseq_external_product_metrics` \
+  MODIFY `qc_status` char(15) DEFAULT NULL \
+  COMMENT 'State of the product after phase 2 of processing, one of "PASS", "HOLD", "INSUFFICIENT", "FAIL"',
+  MODIFY `qc_overall_assessment` char(4) DEFAULT NULL \
+  COMMENT 'State of the product after phase 3 of processing, one of "PASS" or "FAIL"',
+  MODIFY `processing_status` char(15) DEFAULT NULL \
+  COMMENT 'Overall status of the product, one of "PASS", "HOLD", "INSUFFICIENT", "FAIL"',
+  MODIFY `yield` float unsigned DEFAULT NULL \
+  COMMENT 'Sequence data quantity (Gb) excluding duplicate reads, adaptors, overlapping bases from reads on the same fragment, soft-clipped bases, non-N autosome only',
+  MODIFY `yield_q20` bigint(20) unsigned DEFAULT NULL \
+  COMMENT 'Yield in bases at or above Q20 filtered in the same way as the yield column values',
+  MODIFY `yield_q30` bigint(20) unsigned DEFAULT NULL \
+  COMMENT 'Yield in bases at or above Q30 filtered in the same way as the yield column values',
+  MODIFY `num_reads` bigint(20) unsigned DEFAULT NULL \
+  COMMENT 'Number of reads filtered in the same way as the yield column values',
+  MODIFY `target_proper_pair_mapped_reads_fraction` float unsigned DEFAULT NULL \
+  COMMENT 'Fraction of properly paired mapped reads filtered in the same way as the yield column values',
+  MODIFY `annotation` varchar(15) DEFAULT NULL \
+  COMMENT 'Annotation regarding data provenance, i.e. is sequence data from first pass, re-run, top-up, etc.',
+  CHANGE COLUMN `archive_conformation_date` `archive_confirmation_date` \
+  date DEFAULT NULL \
+  COMMENT 'Date of confirmation of integrity of data product by archive service'
+  AFTER `archival_date`,
+  ADD COLUMN `yield_whole_genome` float unsigned DEFAULT NULL \
+  COMMENT 'Sequence data quantity (Gb) excluding duplicate reads, adaptors, overlapping bases from reads on the same fragment, soft-clipped bases' \
+  AFTER `contamination_assessment`,
+  ADD COLUMN `phase2_end_date` datetime DEFAULT NULL \
+  COMMENT 'Date the phase 2 analysis finished for this product' \
+  AFTER `analysis_start_date`;