Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #37 perform additional validation to new traits in stage 3 #39

Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions include/rawpheno.function.measurements.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1173,3 +1173,117 @@ function rawpheno_function_plot_exists($plot, $project_id) {

return $m->fetchField(0);
}


/**
* Function to test if data matches the unit it measuring.
*
* @param $value
* The data/value of a cell in the spreadsheet file.
* @param $unit
* String, the unit as indicated by the column header the data/value falls under.
* @param $column_header
* String, the name of the column header.
*
* @return
* Boolean TRUE unit matched of FALSE unit-data mismatched.
*/
function rawpheno_function_match_unit($value, $unit, $column_header) {
switch($unit) {
case 'date':
// Date must be YYYY-MM-DD format.
if (preg_match('/^([0-9]{4})-([0-9]{2})-([0-9]{2})/', $value, $matches)) {
// Check that the month/day are in the expected range.
$year = $matches[1];
$month = $matches[2];
$day = $matches[3];

if ($month >= 1 AND $month <= 12 AND $day >= 1 AND $day <= 31 AND $year > 1900) {
// Then check that it's not in the future.
$today = new DateTime();
$date = DateTime::createFromFormat('Y-m-d', $value);

if ($date <= $today) {
return TRUE;
}
}
}

break;

case 'count':
case 'days':
// First check that it's a number.
if (is_numeric($value)) {
// Then check it's a positive whole number.
if ((int)$value == $value AND $value > 0) {
return TRUE;
}
}

// Allow 0 for count.
if ($unit == 'count' AND strval($value) === '0') {
return TRUE;
}

break;

case 'cm':
case 'g':
// Measurements - if it is numeric, then it must be greater than 0.
if (is_numeric($value) && $value > 0) {
return TRUE;
}

// Allow 0
if (strval($value) === '0') {
return TRUE;
}

break;

case 'y/n/?':
// Yes or No - if it is char, length is one, then check if y, n, ? (question mark).
if (in_array(strtolower($value), array('y','n','?', 'yes', 'no'))) {
return TRUE;
}

break;

case 'scale':
// Scales are associated with the trait cvterm and each approved code is stored in pheno_scale_member.
// First retrieve the cvterm_id for the trait.
$trait_id = rawpheno_get_trait_id($column_header);

if ($trait_id) {
// Then check that the value is in the scale members
$present = db_query('SELECT true FROM {pheno_scale_member} WHERE scale_id=:trait_id AND code=:value',
array(':trait_id' => $trait_id, ':value' => $value))->fetchField();

if ($present) return TRUE;

// If not, make sure there are members.
$has_scale = db_query('SELECT count(*) FROM {pheno_scale_member} WHERE scale_id=:trait_id',
array(':trait_id' => $trait_id))->fetchField();

if (!$has_scale) return TRUE;
}
else {
// If this is a new trait/column then we don't know how to validate it, so we assume it's correct.
return TRUE;
}

break;

case 'text':
return TRUE;

break;

default:
return TRUE;
}


return FALSE;
}
93 changes: 93 additions & 0 deletions include/rawpheno.upload.excel.inc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro
$plantprop_headers = unserialize($plantprop_headers);
$arr_newheaders = unserialize($arr_newheaders);

// print_r($arr_newheaders);

// First we load the file.
$file = file_load($fid);

Expand Down Expand Up @@ -76,6 +78,9 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro
// Variations of Not Applicable.
$not_applicable = array('na', 'n/a', 'n.a.');

// New trait that failed validation.
$failed_newtrait = array();

// Start Transaction.
$TRANSACTION = db_transaction();
try {
Expand Down Expand Up @@ -249,6 +254,28 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro
continue;
}
elseif ($arr_newheaders[$cell_colheader]['flag'] == 1) {
// Test if the validation performed on this field had a success flag.
if (isset($arr_newheaders[$cell_colheader]['test'])) {
$failed_count = 0;

foreach($arr_newheaders[$cell_colheader]['test'] as $test_i => $test_result) {
if ($test_result == FALSE) {
// This new trait failed validation, skip.
$failed_count++;
}
}

if ($failed_count > 0) {
// Log this failed new trait.
if (!in_array($cell_colheader, $failed_newtrait)) {
$failed_newtrait[] = $cell_colheader;
}

continue;
}
}


// Get the cvterm name for this new header.
$alt_name = $arr_newheaders[$cell_colheader]['alt_header'];
$name = tripal_get_cvterm(array('cvterm_id' => $alt_name, 'cv_id' => array('name' => 'phenotype_measurement_types')));
Expand Down Expand Up @@ -429,6 +456,38 @@ function rawpheno_load_spreadsheet($project_id, $arr_newheaders, $fid, $plantpro
print "\nUpdating the materialized view summarizing phenotypic data.\n";
$mview_id = tripal_get_mview_id('rawpheno_rawdata_summary');
if ($mview_id) tripal_populate_mview($mview_id);

print "Scanning logs for failed traits...\n";
if (count($failed_newtrait) > 0) {
$support_email = variable_get('rawpheno_support_email');

if ($support_email) {
// Send email only when there is an email set.

// New Traits/Column Headers. Convert failed new traits into a comma separated list.
$trait_list = implode(',', $failed_newtrait);
$report = "\n" . '* Traits: ' . $trait_list;

// Project Name.
$project_name = rawpheno_function_getproject($project_id);
$report .= "\n" . '* Project: ' . $project_name;

// Data Collection Spreadsheet File (xlsx).
$report .= "\n" . '* XLSX FILE: ' . $xls_file;

// Username of Data Collector.
$report .= "\n" . '* Data Collector: ' . $GLOBALS['user']->name;

// Send information to adminstrator.
rawpheno_notify_supportemail($support_email, $report);

// Tell admin info sent.
print "Information sent to support email...\n";
}
}
else {
print "No failed new traits. \n";
}
}


Expand Down Expand Up @@ -1161,3 +1220,37 @@ function rawpheno_function_cvterm_properties($cvterm_id) {
return 'Describe the method used not available';
}
}


/**
* Function to notify support email/administrator of a new trait that failed
* validation in stage 3 - Save Spreadsheet file.
*
* @param $support_email
* String, support email address.
* @param $content
* An array containing failed new traits. Information such as the Trait name,
* Project, XLSX File and the username of data collector.
*/
function rawpheno_notify_supportemail($support_email, $content) {
global $base_url;
$site_name = variable_get('site_name');

// Set who this email is being sent from (ie: from KnowPulse)
$from = variable_get('site_mail', '');
$to = $support_email;

// Compile the body of the email to send.
$body[] = 'Dear Administrator, '. "\n";
$body[] = 'The following new traits failed validation in Raw Phenotypes Module:' . "\n";
$body[] = $content;
$body[] = "\n" . 'Website: ' . $site_name . ' (' . $base_url . ')';

$params = array(
'body' => $body,
'subject' => $site_name . ' : RawPhenotypes - New Trait Failed',
);

return drupal_mail('rawpheno', 'rawpheno', $to, language_default(), $params, $from, TRUE);
}

94 changes: 92 additions & 2 deletions include/rawpheno.upload.form.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,16 @@ function rawpheno_submit_review($form, &$form_state) {
if (count($new_header) > 0) {
$trait_type = rawpheno_function_trait_types();

// In some cases, the unit contains 1st; 2nd; followed by the actual unit.
// Attempt to extract known unit.
// Known units:
// date, count, days, cm, scale, g, text, y/n/?;
$def_unit = rawpheno_function_default_unit('def');
$unit_set = array_keys($def_unit);

// Count trait that has been checked for saving.
$has_newtraits_checked = 0;

// Read each column header.
foreach($new_header as $i => $header) {
// For each new header store information provided in the interface.
Expand All @@ -1030,6 +1040,25 @@ function rawpheno_submit_review($form, &$form_state) {
// Determine if the form in review traits has been filled out and checkbox
// has been checked by user. If it has been checked then save the trait.
if ($form_state['values']['chk_' . $i] === 1 && !empty($form_state['values']['txt_header_' . $i])) {
$has_newtraits_checked++;

// Examine the unit and find any default unit.
// This will be use to test this new trait if data match the unit.
$tmp_unit = $form_state['values']['txt_unit_' . $i];

foreach($unit_set as $def_u) {
if (strpos($tmp_unit, $def_u) !== FALSE) {
$tmp_unit = $def_u;

break;
}
}

// If none detected, send the entire value/unit to validator
// which will evaluate to text.

$arr_newheaders[$header]['unit'] = $tmp_unit;

// Before save, we need to tell if the header is present in the database and
// user just wants to reuse them. Otherwise, add a new header.
// Reuse header - set to OPTIONAL.
Expand Down Expand Up @@ -1190,11 +1219,72 @@ function rawpheno_submit_review($form, &$form_state) {
}
}

// 2. The entire spreadsheet.
// 2. The entire spreadsheet and validate new traits..
// Get the variable that holds the path to the spreadsheet file in the server.
$file = file_load($form_state['multistep_values']['fid']);
$xls_file = drupal_realpath($file->uri);

// Before saving, double check to ensure that new traits user wants in the system
// must have data that matches the unit is measuring. When a trait fails validation,
// we flag it as such then it will be ignored in the save process.
// Additional email notification to admin (email support) about this trait.

// Find all traits checked out by user.

// Validate all checked traits.
if ($has_newtraits_checked > 0) {
// Get headers and cell values in rows and columns used for validation.
// Number of rows to check.
$rows_to_check = 20;

// Add the libraries needed to parse excel files.
rawpheno_add_parsing_libraries();
$xls_obj = rawpheno_open_file($file);

// Change to the correct spreadsheet.
rawpheno_change_sheet($xls_obj, 'measurements');

$n_i = 0;

foreach ($xls_obj as $rows) {
// Headers.
if ($n_i == 0) {
foreach($rows as $row_index => $row_header) {
$h = trim(str_replace(array("\n", "\r", " "), ' ', $row_header));
$h = preg_replace('/\s+/', ' ', $h);

if (in_array($h, array_keys($arr_newheaders)) && $arr_newheaders[$h]['flag'] == 1) {
// Save the index number.
$arr_newheaders[$h]['file_index'] = $row_index;
}
}

$n_i++;
continue;
}

// Cell/Data.
foreach($rows as $cell_index => $cell_data) {
foreach($arr_newheaders as $nt_header => $nt_prop) {
if ($nt_prop['file_index'] == $cell_index && $nt_prop['flag'] == 1) {
$cell_data = trim($cell_data);

$arr_newheaders[$nt_header]['test'][]
= rawpheno_function_match_unit($cell_data, $nt_prop['unit'], $nt_header);
}
}
}

// Stop at # of rows.
if ($n_i == $rows_to_check) {
break;
}

$n_i++;
}
}


// Array of required traits excluding Name.
$plantprop_headers = rawpheno_project_plantproperty_traits($project_id);

Expand All @@ -1210,7 +1300,7 @@ function rawpheno_submit_review($form, &$form_state) {
$project_id,
serialize($arr_newheaders),
$form_state['multistep_values']['fid'],
serialize($plantprop_headers)
serialize($plantprop_headers),
),
$user->uid
);
Expand Down
Loading