From 981e6db76de32973731693c3c15c4ad10fec70ec Mon Sep 17 00:00:00 2001 From: Reynold Tan Date: Wed, 21 Mar 2018 15:26:32 -0600 Subject: [PATCH 1/3] Perform additional validation to new traits. - This requires additional functionality to notify admin about new traits that failed validation. --- include/rawpheno.function.measurements.inc | 114 +++++++++++++++++++++ include/rawpheno.upload.excel.inc | 38 +++++++ include/rawpheno.upload.form.inc | 75 +++++++++++++- include/rawpheno.validation.inc | 90 ++-------------- 4 files changed, 232 insertions(+), 85 deletions(-) diff --git a/include/rawpheno.function.measurements.inc b/include/rawpheno.function.measurements.inc index 4b2e2a4..5c94677 100644 --- a/include/rawpheno.function.measurements.inc +++ b/include/rawpheno.function.measurements.inc @@ -1173,3 +1173,117 @@ function rawpheno_function_plot_exists($plot, $project_id) { return $m->fetchField(0); } + + +/** + * Function to test if data matches the unit it measuring. + * + * @param $value + * The data/value of a cell in the spreadsheet file. + * @param $unit + * String, the unit as indicated by the column header the data/value falls under. + * @param $column_header + * String, the name of the column header. + * + * @return + * Boolean TRUE unit matched of FALSE unit-data mismatched. + */ +function rawpheno_function_match_unit($value, $unit, $column_header) { + switch($unit) { + case 'date': + // Date must be YYYY-MM-DD format. + if (preg_match('/^([0-9]{4})-([0-9]{2})-([0-9]{2})/', $value, $matches)) { + // Check that the month/day are in the expected range. + $year = $matches[1]; + $month = $matches[2]; + $day = $matches[3]; + + if ($month >= 1 AND $month <= 12 AND $day >= 1 AND $day <= 31 AND $year > 1900) { + // Then check that it's not in the future. + $today = new DateTime(); + $date = DateTime::createFromFormat('Y-m-d', $value); + + if ($date <= $today) { + return TRUE; + } + } + } + + break; + + case 'count': + case 'days': + // First check that it's a number. + if (is_numeric($value)) { + // Then check it's a positive whole number. + if ((int)$value == $value AND $value > 0) { + return TRUE; + } + } + + // Allow 0 for count. + if ($unit == 'count' AND strval($value) === '0') { + return TRUE; + } + + break; + + case 'cm': + case 'g': + // Measurements - if it is numeric, then it must be greater than 0. + if (is_numeric($value) && $value > 0) { + return TRUE; + } + + // Allow 0 + if (strval($value) === '0') { + return TRUE; + } + + break; + + case 'y/n/?': + // Yes or No - if it is char, length is one, then check if y, n, ? (question mark). + if (in_array(strtolower($value), array('y','n','?', 'yes', 'no'))) { + return TRUE; + } + + break; + + case 'scale': + // Scales are associated with the trait cvterm and each approved code is stored in pheno_scale_member. + // First retrieve the cvterm_id for the trait. + $trait_id = rawpheno_get_trait_id($column_header); + + if ($trait_id) { + // Then check that the value is in the scale members + $present = db_query('SELECT true FROM {pheno_scale_member} WHERE scale_id=:trait_id AND code=:value', + array(':trait_id' => $trait_id, ':value' => $value))->fetchField(); + + if ($present) return TRUE; + + // If not, make sure there are members. + $has_scale = db_query('SELECT count(*) FROM {pheno_scale_member} WHERE scale_id=:trait_id', + array(':trait_id' => $trait_id))->fetchField(); + + if (!$has_scale) return TRUE; + } + else { + // If this is a new trait/column then we don't know how to validate it, so we assume it's correct. + return TRUE; + } + + break; + + case 'text': + return TRUE; + + break; + + default: + return TRUE; + } + + + return FALSE; +} diff --git a/include/rawpheno.upload.excel.inc b/include/rawpheno.upload.excel.inc index aba8c2f..4d14856 100755 --- a/include/rawpheno.upload.excel.inc +++ b/include/rawpheno.upload.excel.inc @@ -25,6 +25,8 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro $plantprop_headers = unserialize($plantprop_headers); $arr_newheaders = unserialize($arr_newheaders); + // print_r($arr_newheaders); + // First we load the file. $file = file_load($fid); @@ -76,6 +78,9 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro // Variations of Not Applicable. $not_applicable = array('na', 'n/a', 'n.a.'); + // New trait that failed validation. + $failed_newtrait = array(); + // Start Transaction. $TRANSACTION = db_transaction(); try { @@ -249,6 +254,28 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro continue; } elseif ($arr_newheaders[$cell_colheader]['flag'] == 1) { + // Test if the validation performed on this field had a success flag. + if (isset($arr_newheaders[$cell_colheader]['test'])) { + $failed_count = 0; + + foreach($arr_newheaders[$cell_colheader]['test'] as $test_i => $test_result) { + if ($test_result == FALSE) { + // This new trait failed validation, skip. + $failed_count++; + } + } + + if ($failed_count > 0) { + // Log this failed new trait. + if (!in_array($cell_colheader, $failed_newtrait)) { + $failed_newtrait[] = $cell_colheader; + } + + continue; + } + } + + // Get the cvterm name for this new header. $alt_name = $arr_newheaders[$cell_colheader]['alt_header']; $name = tripal_get_cvterm(array('cvterm_id' => $alt_name, 'cv_id' => array('name' => 'phenotype_measurement_types'))); @@ -429,6 +456,17 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro print "\nUpdating the materialized view summarizing phenotypic data.\n"; $mview_id = tripal_get_mview_id('rawpheno_rawdata_summary'); if ($mview_id) tripal_populate_mview($mview_id); + + print "Scanning logs for failed traits...\n"; + if (count($failed_newtrait) > 0) { + print "Failed traits sent to admin/support email."; + // @TODO: Email administrator of new traits that failed validation. + // FUNCTIONALITY IN ANOTHER BRANCH PullRequest: #36 - Create a function + // to fetch support email address. + } + else { + print "No failed new traits."; + } } diff --git a/include/rawpheno.upload.form.inc b/include/rawpheno.upload.form.inc index f911831..d6e93b7 100755 --- a/include/rawpheno.upload.form.inc +++ b/include/rawpheno.upload.form.inc @@ -1018,6 +1018,9 @@ function rawpheno_submit_review($form, &$form_state) { if (count($new_header) > 0) { $trait_type = rawpheno_function_trait_types(); + // Count trait that has been checked for saving. + $has_newtraits_checked = 0; + // Read each column header. foreach($new_header as $i => $header) { // For each new header store information provided in the interface. @@ -1030,6 +1033,8 @@ function rawpheno_submit_review($form, &$form_state) { // Determine if the form in review traits has been filled out and checkbox // has been checked by user. If it has been checked then save the trait. if ($form_state['values']['chk_' . $i] === 1 && !empty($form_state['values']['txt_header_' . $i])) { + $has_newtraits_checked++; + // Before save, we need to tell if the header is present in the database and // user just wants to reuse them. Otherwise, add a new header. // Reuse header - set to OPTIONAL. @@ -1060,6 +1065,7 @@ function rawpheno_submit_review($form, &$form_state) { // When saving this data for this header, use the cvterm_id. $arr_newheaders[$header]['alt_header'] = $cvterm_id; + $arr_newheaders[$header]['unit'] = $form_state['values']['txt_unit_' . $i]; continue; } @@ -1073,6 +1079,10 @@ function rawpheno_submit_review($form, &$form_state) { $name = preg_replace('/\s+/', ' ', $name); $unit = trim($form_state['values']['txt_unit_' . $i]); + + // Save the unit entered for this trait. + $arr_newheaders[$header]['unit'] = $unit; + $method = trim($form_state['values']['txtarea_describe_' . $i]); $def = trim($form_state['values']['txt_def_' . $i]); @@ -1190,11 +1200,72 @@ function rawpheno_submit_review($form, &$form_state) { } } - // 2. The entire spreadsheet. + // 2. The entire spreadsheet and validate new traits.. // Get the variable that holds the path to the spreadsheet file in the server. $file = file_load($form_state['multistep_values']['fid']); $xls_file = drupal_realpath($file->uri); + // Before saving, double check to ensure that new traits user wants in the system + // must have data that matches the unit is measuring. When a trait fails validation, + // we flag it as such then it will be ignored in the save process. + // Additional email notification to admin (email support) about this trait. + + // Find all traits checked out by user. + + // Validate all checked traits. + if ($has_newtraits_checked > 0) { + // Get headers and cell values in rows and columns used for validation. + // Number of rows to check. + $rows_to_check = 20; + + // Add the libraries needed to parse excel files. + rawpheno_add_parsing_libraries(); + $xls_obj = rawpheno_open_file($file); + + // Change to the correct spreadsheet. + rawpheno_change_sheet($xls_obj, 'measurements'); + + $n_i = 0; + + foreach ($xls_obj as $rows) { + // Headers. + if ($n_i == 0) { + foreach($rows as $row_index => $row_header) { + $h = trim(str_replace(array("\n", "\r", " "), ' ', $row_header)); + $h = preg_replace('/\s+/', ' ', $h); + + if (in_array($h, array_keys($arr_newheaders)) && $arr_newheaders[$h]['flag'] == 1) { + // Save the index number. + $arr_newheaders[$h]['file_index'] = $row_index; + } + } + + $n_i++; + continue; + } + + // Cell/Data. + foreach($rows as $cell_index => $cell_data) { + foreach($arr_newheaders as $nt_header => $nt_prop) { + if ($nt_prop['file_index'] == $cell_index && $nt_prop['flag'] == 1) { + $cell_data = trim($cell_data); + + $arr_newheaders[$nt_header]['test'][] + = rawpheno_function_match_unit($cell_data, $nt_prop['unit'], $nt_header); + } + } + } + + // Stop at # of rows. + if ($n_i == $rows_to_check) { + break; + } + + $n_i++; + } + } + + // Array of required traits excluding Name. $plantprop_headers = rawpheno_project_plantproperty_traits($project_id); @@ -1210,7 +1281,7 @@ function rawpheno_submit_review($form, &$form_state) { $project_id, serialize($arr_newheaders), $form_state['multistep_values']['fid'], - serialize($plantprop_headers) + serialize($plantprop_headers), ), $user->uid ); diff --git a/include/rawpheno.validation.inc b/include/rawpheno.validation.inc index 9d1ac36..e1d0fa6 100644 --- a/include/rawpheno.validation.inc +++ b/include/rawpheno.validation.inc @@ -508,92 +508,16 @@ function validator_units_match_type_validate_cell($value, $context, &$storage) { } // Then check if it matches based on the unit. - switch($unit) { - case 'date': - // Date must be YYYY-MM-DD format. - if (preg_match('/^([0-9]{4})-([0-9]{2})-([0-9]{2})/', $value, $matches)) { - - // Check that the month/day are in the expected range. - $year = $matches[1]; - $month = $matches[2]; - $day = $matches[3]; - if ($month >= 1 AND $month <= 12 AND $day >= 1 AND $day <= 31 AND $year > 1900) { - - // Then check that it's not in the future. - $today = new DateTime(); - $date = DateTime::createFromFormat('Y-m-d', $value); - if ($date <= $today) { - return TRUE; - } - } - } - break; - - case 'count': - case 'days': - // First check that it's a number. - if (is_numeric($value)) { - // Then check it's a positive whole number. - if ((int)$value == $value AND $value > 0) { - return TRUE; - } - } - // Allow 0 for count. - if ($unit == 'count' AND strval($value) === '0') { - return TRUE; - } - break; - - case 'cm': - case 'g': - // Measurements - if it is numeric, then it must be greater than 0. - if (is_numeric($value) && $value > 0) { - return TRUE; - } - // Allow 0 - if (strval($value) === '0') { - return TRUE; - } - break; - - case 'y/n/?': - // Yes or No - if it is char, length is one, then check if y, n, ? (question mark). - if (in_array(strtolower($value), array('y','n','?', 'yes', 'no'))) { - return TRUE; - } - break; - - case 'scale': - // Scales are associated with the trait cvterm and each approved code is stored - // in pheno_scale_member. - // First retrieve the cvterm_id for the trait. - $trait = $context['header'][ $context['column index'] ]['original']; - $trait_id = rawpheno_get_trait_id($trait); - if ($trait_id) { - // Then check that the value is in the scale members - $present = db_query('SELECT true FROM {pheno_scale_member} WHERE scale_id=:trait_id AND code=:value', - array(':trait_id' => $trait_id, ':value' => $value))->fetchField(); - if ($present) return TRUE; - - // If not, make sure there are members. - $has_scale = db_query('SELECT count(*) FROM {pheno_scale_member} WHERE scale_id=:trait_id', - array(':trait_id' => $trait_id))->fetchField(); - if (!$has_scale) return TRUE; - } - // If this is a new trait/column then we don't know how to validate it, so we assume it's correct. - else { - return TRUE; - } - break; - - case 'text': - return TRUE; - break; + $column_header = $context['header'][ $context['column index'] ]['original']; - default: - return TRUE; + // Match unit to data. + $match_unit = rawpheno_function_match_unit($value, $unit, $column_header); + if ($match_unit) { + // If TRUE, return the value and match next data. + return $match_unit; } + // Something went wrong, data and unit do not match. return array('value' => $value, 'unit' => $unit); } From 13bddfb41e9f998cd6e8bf9dbbb729b3d0df10ec Mon Sep 17 00:00:00 2001 From: Reynold Tan Date: Thu, 22 Mar 2018 13:30:35 -0600 Subject: [PATCH 2/3] Match data and unit. - Updated code to account for tokens in the unit which may cause a unit to be validated as text. --- include/rawpheno.upload.excel.inc | 4 ++-- include/rawpheno.upload.form.inc | 29 ++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/rawpheno.upload.excel.inc b/include/rawpheno.upload.excel.inc index 4d14856..1fa79df 100755 --- a/include/rawpheno.upload.excel.inc +++ b/include/rawpheno.upload.excel.inc @@ -459,13 +459,13 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro print "Scanning logs for failed traits...\n"; if (count($failed_newtrait) > 0) { - print "Failed traits sent to admin/support email."; + print "Failed traits sent to admin/support email. \n"; // @TODO: Email administrator of new traits that failed validation. // FUNCTIONALITY IN ANOTHER BRANCH PullRequest: #36 - Create a function // to fetch support email address. } else { - print "No failed new traits."; + print "No failed new traits. \n"; } } diff --git a/include/rawpheno.upload.form.inc b/include/rawpheno.upload.form.inc index d6e93b7..328bb9d 100755 --- a/include/rawpheno.upload.form.inc +++ b/include/rawpheno.upload.form.inc @@ -1018,6 +1018,13 @@ function rawpheno_submit_review($form, &$form_state) { if (count($new_header) > 0) { $trait_type = rawpheno_function_trait_types(); + // In some cases, the unit contains 1st; 2nd; followed by the actual unit. + // Attempt to extract known unit. + // Known units: + // date, count, days, cm, scale, g, text, y/n/?; + $def_unit = rawpheno_function_default_unit('def'); + $unit_set = array_keys($def_unit); + // Count trait that has been checked for saving. $has_newtraits_checked = 0; @@ -1035,6 +1042,23 @@ function rawpheno_submit_review($form, &$form_state) { if ($form_state['values']['chk_' . $i] === 1 && !empty($form_state['values']['txt_header_' . $i])) { $has_newtraits_checked++; + // Examine the unit and find any default unit. + // This will be use to test this new trait if data match the unit. + $tmp_unit = $form_state['values']['txt_unit_' . $i]; + + foreach($unit_set as $def_u) { + if (strpos($tmp_unit, $def_u) !== FALSE) { + $tmp_unit = $def_u; + + break; + } + } + + // If none detected, send the entire value/unit to validator + // which will evaluate to text. + + $arr_newheaders[$header]['unit'] = $tmp_unit; + // Before save, we need to tell if the header is present in the database and // user just wants to reuse them. Otherwise, add a new header. // Reuse header - set to OPTIONAL. @@ -1065,7 +1089,6 @@ function rawpheno_submit_review($form, &$form_state) { // When saving this data for this header, use the cvterm_id. $arr_newheaders[$header]['alt_header'] = $cvterm_id; - $arr_newheaders[$header]['unit'] = $form_state['values']['txt_unit_' . $i]; continue; } @@ -1079,10 +1102,6 @@ function rawpheno_submit_review($form, &$form_state) { $name = preg_replace('/\s+/', ' ', $name); $unit = trim($form_state['values']['txt_unit_' . $i]); - - // Save the unit entered for this trait. - $arr_newheaders[$header]['unit'] = $unit; - $method = trim($form_state['values']['txtarea_describe_' . $i]); $def = trim($form_state['values']['txt_def_' . $i]); From 5b5b616361d2dd4a52a2865ed44aba3a78a3eed7 Mon Sep 17 00:00:00 2001 From: Reynold Tan Date: Wed, 25 Apr 2018 14:47:19 -0600 Subject: [PATCH 3/3] Send info about failed new traits. --- include/rawpheno.upload.excel.inc | 63 +++++++++++++++++++++++++++++-- rawpheno.module | 21 +++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/include/rawpheno.upload.excel.inc b/include/rawpheno.upload.excel.inc index 1fa79df..8d9a510 100755 --- a/include/rawpheno.upload.excel.inc +++ b/include/rawpheno.upload.excel.inc @@ -459,10 +459,31 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro print "Scanning logs for failed traits...\n"; if (count($failed_newtrait) > 0) { - print "Failed traits sent to admin/support email. \n"; - // @TODO: Email administrator of new traits that failed validation. - // FUNCTIONALITY IN ANOTHER BRANCH PullRequest: #36 - Create a function - // to fetch support email address. + $support_email = variable_get('rawpheno_support_email'); + + if ($support_email) { + // Send email only when there is an email set. + + // New Traits/Column Headers. Convert failed new traits into a comma separated list. + $trait_list = implode(',', $failed_newtrait); + $report = "\n" . '* Traits: ' . $trait_list; + + // Project Name. + $project_name = rawpheno_function_getproject($project_id); + $report .= "\n" . '* Project: ' . $project_name; + + // Data Collection Spreadsheet File (xlsx). + $report .= "\n" . '* XLSX FILE: ' . $xls_file; + + // Username of Data Collector. + $report .= "\n" . '* Data Collector: ' . $GLOBALS['user']->name; + + // Send information to adminstrator. + rawpheno_notify_supportemail($support_email, $report); + + // Tell admin info sent. + print "Information sent to support email...\n"; + } } else { print "No failed new traits. \n"; @@ -1199,3 +1220,37 @@ function rawpheno_function_cvterm_properties($cvterm_id) { return 'Describe the method used not available'; } } + + +/** + * Function to notify support email/administrator of a new trait that failed + * validation in stage 3 - Save Spreadsheet file. + * + * @param $support_email + * String, support email address. + * @param $content + * An array containing failed new traits. Information such as the Trait name, + * Project, XLSX File and the username of data collector. + */ +function rawpheno_notify_supportemail($support_email, $content) { + global $base_url; + $site_name = variable_get('site_name'); + + // Set who this email is being sent from (ie: from KnowPulse) + $from = variable_get('site_mail', ''); + $to = $support_email; + + // Compile the body of the email to send. + $body[] = 'Dear Administrator, '. "\n"; + $body[] = 'The following new traits failed validation in Raw Phenotypes Module:' . "\n"; + $body[] = $content; + $body[] = "\n" . 'Website: ' . $site_name . ' (' . $base_url . ')'; + + $params = array( + 'body' => $body, + 'subject' => $site_name . ' : RawPhenotypes - New Trait Failed', + ); + + return drupal_mail('rawpheno', 'rawpheno', $to, language_default(), $params, $from, TRUE); +} + diff --git a/rawpheno.module b/rawpheno.module index 700f030..6db07ea 100755 --- a/rawpheno.module +++ b/rawpheno.module @@ -1647,3 +1647,24 @@ function rawpheno_preprocess_block(&$vars) { return $vars; } + +/** + * Implements hook_mail(). + * Used for the the drupal mail system. + */ +function rawpheno_mail($key, &$message, $params) { + + $headers = array( + 'MIME-Version' => '1.0', + 'Content-Type' => 'text/plain; charset=UTF-8;', + 'Content-Transfer-Encoding' => '8Bit', + 'X-Mailer' => 'Drupal' + ); + + foreach ($headers as $key => $value) { + $message['headers'][$key] = $value; + } + + $message['subject'] = $params['subject']; + $message['body'] = $params['body']; +}