Skip to content

Commit

Permalink
Merge pull request openemr#7729 from stephenwaite/iss7477-rel-702
Browse files Browse the repository at this point in the history
ins subscriber name validation fixes see openemr#7477 (openemr#7478) for rel-702
  • Loading branch information
stephenwaite authored Sep 18, 2024
2 parents 0878c9b + 896e051 commit 31886ae
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 2 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
},
"config": {
"platform": {
"php": "8.1"
"php": "8.1",
"ext-intl": "8.1"
},
"allow-plugins": {
"openemr/oe-module-installer-plugin": true,
Expand Down
23 changes: 23 additions & 0 deletions library/sanitize.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,26 @@ function dateEmptySql($sqlColumn, $time = false, $rev = false)

return $stat;
}

/**
* Compares a multibyte unicode string identifier in a case insensitive way to see if the two strings
* are semantically identical. Note that NFKC will treat several 'similar' semantically meaning texts as the same and so
* should be used for identifiers (things such as proper nouns, etc).
* @see https://learn.microsoft.com/en-us/windows/win32/intl/using-unicode-normalization-to-represent-strings - * Note if trying to understand string normalization, Microsoft has a good explanation here
* @see https://www.unicode.org/faq/normalization.html#2 for explanation on why we use NFKC
* @see https://stackoverflow.com/a/38855868
* @param $string1
* @param $string2
* @return bool
*/
function mb_is_string_equal_ci($string1, $string2): bool
{
if ($string1 == $string2) {
return true;
}

$string1_normalized = Normalizer::normalize($string1, Normalizer::FORM_KC);
$string2_normalized = Normalizer::normalize($string2, Normalizer::FORM_KC);
return mb_strtolower($string1_normalized) === mb_strtolower($string2_normalized)
|| mb_strtoupper($string1_normalized) === mb_strtoupper($string2_normalized);
}
8 changes: 7 additions & 1 deletion src/Validators/CoverageValidator.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,13 @@ function (Validator $context) {
$previousNames = $patient['previous_names'];
$found = false;
foreach ($previousNames as $previousName) {
if ($previousName['previous_name_first'] == $values['subscriber_fname'] && $previousName['previous_name_last'] == $values['subscriber_lname']) {
// do a strict equality and then we can do multibyte comparison for localizations
// note if we want to handle more comprehensive multibytes
// we need to do some normalizations as per this stackoverflow post: https://stackoverflow.com/a/38855868
if (
mb_is_string_equal_ci($previousName['previous_name_first'], $values['subscriber_fname'])
&& mb_is_string_equal_ci($previousName['previous_name_last'], $values['subscriber_lname'])
) {
$found = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

/**
* Test cases for the sanitize.inc.php mb_is_string_equal_ci function
*
* @package OpenEMR
* @link http://www.open-emr.org
* @author Stephen Nielson <[email protected]>
* @copyright Copyright (c) 2024 Discover and Change, Inc. <[email protected]>
* @license https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3
*/

namespace OpenEMR\Tests\Unit\library;

use PHPUnit\Framework\TestCase;

class SanitizeMultiByteCaseInsensitiveStringEqualsTest extends TestCase
{
public function testIdenticalStrings()
{
$this->assertTrue(mb_is_string_equal_ci('Hello', 'Hello'));
}

public function testComposedCharacters()
{
// Composed characters that normalize to the same form under NFKC
$this->assertTrue(mb_is_string_equal_ci('', 'é')); // e + combining acute accent vs é
$this->assertTrue(mb_is_string_equal_ci('', 'ö')); // o + combining diaeresis vs ö
}
public function testDecomposedCharacters()
{
// Decomposed form of Ä: A + combining diaeresis (U+00C4 -> U+0041 U+0308)
$this->assertTrue(mb_is_string_equal_ci('', 'Ä'));
}
public function testCaseInsensitivity()
{
// Characters that are different in case but should be equal after case folding
$this->assertTrue(mb_is_string_equal_ci('abc', 'ABC'));
$this->assertTrue(mb_is_string_equal_ci('ß', 'SS')); // German eszett (ß) vs SS
$this->assertTrue(mb_is_string_equal_ci('Ä', 'ä'));
}

public function testDifferentStrings()
{
$this->assertFalse(mb_is_string_equal_ci('hello', 'world'));
}

public function testEmptyStrings()
{
$this->assertTrue(mb_is_string_equal_ci('', ''));
}

public function testLargeStrings()
{
// Generate a large string
$string1 = str_repeat('a', 100000);
$string2 = str_repeat('A', 100000);
$this->assertTrue(mb_is_string_equal_ci($string1, $string2));
}
}

0 comments on commit 31886ae

Please sign in to comment.