Merge pull request openemr#7729 from stephenwaite/iss7477-rel-702

ins subscriber name validation fixes see openemr#7477 (openemr#7478) for rel-702
sjpadgett · Sep 18, 2024 · 31886ae · 31886ae
2 parents 0878c9b + 896e051
commit 31886ae
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 2 deletions.
diff --git a/composer.json b/composer.json
@@ -81,7 +81,8 @@
     },
     "config": {
         "platform": {
-            "php": "8.1"
+            "php": "8.1",
+            "ext-intl": "8.1"
         },
         "allow-plugins": {
             "openemr/oe-module-installer-plugin": true,

diff --git a/library/sanitize.inc.php b/library/sanitize.inc.php
@@ -177,3 +177,26 @@ function dateEmptySql($sqlColumn, $time = false, $rev = false)
 
     return $stat;
 }
+
+/**
+ * Compares a multibyte unicode string identifier in a case insensitive way to see if the two strings
+ * are semantically identical.  Note that NFKC will treat several 'similar' semantically meaning texts as the same and so
+ * should be used for identifiers (things such as proper nouns, etc).
+ * @see https://learn.microsoft.com/en-us/windows/win32/intl/using-unicode-normalization-to-represent-strings - * Note if trying to understand string normalization, Microsoft has a good explanation here
+ * @see https://www.unicode.org/faq/normalization.html#2 for explanation on why we use NFKC
+ * @see https://stackoverflow.com/a/38855868
+ * @param $string1
+ * @param $string2
+ * @return bool
+ */
+function mb_is_string_equal_ci($string1, $string2): bool
+{
+    if ($string1 == $string2) {
+        return true;
+    }
+
+    $string1_normalized = Normalizer::normalize($string1, Normalizer::FORM_KC);
+    $string2_normalized = Normalizer::normalize($string2, Normalizer::FORM_KC);
+    return mb_strtolower($string1_normalized) === mb_strtolower($string2_normalized)
+        || mb_strtoupper($string1_normalized) === mb_strtoupper($string2_normalized);
+}
diff --git a/src/Validators/CoverageValidator.php b/src/Validators/CoverageValidator.php
@@ -112,7 +112,13 @@ function (Validator $context) {
                                     $previousNames = $patient['previous_names'];
                                     $found = false;
                                     foreach ($previousNames as $previousName) {
-                                        if ($previousName['previous_name_first'] == $values['subscriber_fname'] && $previousName['previous_name_last'] == $values['subscriber_lname']) {
+                                        // do a strict equality and then we can do multibyte comparison for localizations
+                                        // note if we want to handle more comprehensive multibytes
+                                        // we need to do some normalizations as per this stackoverflow post: https://stackoverflow.com/a/38855868
+                                        if (
+                                            mb_is_string_equal_ci($previousName['previous_name_first'], $values['subscriber_fname'])
+                                            && mb_is_string_equal_ci($previousName['previous_name_last'], $values['subscriber_lname'])
+                                        ) {
                                             $found = true;
                                             break;
                                         }

diff --git a/tests/Tests/Unit/library/SanitizeMultiByteCaseInsensitiveStringEqualsTest.php b/tests/Tests/Unit/library/SanitizeMultiByteCaseInsensitiveStringEqualsTest.php
@@ -0,0 +1,60 @@
+<?php
+
+/**
+ * Test cases for the sanitize.inc.php mb_is_string_equal_ci function
+ *
+ * @package   OpenEMR
+ * @link      http://www.open-emr.org
+ * @author    Stephen Nielson <[email protected]>
+ * @copyright Copyright (c) 2024 Discover and Change, Inc. <[email protected]>
+ * @license   https://github.com/openemr/openemr/blob/master/LICENSE GNU General Public License 3
+ */
+
+namespace OpenEMR\Tests\Unit\library;
+
+use PHPUnit\Framework\TestCase;
+
+class SanitizeMultiByteCaseInsensitiveStringEqualsTest extends TestCase
+{
+    public function testIdenticalStrings()
+    {
+        $this->assertTrue(mb_is_string_equal_ci('Hello', 'Hello'));
+    }
+
+    public function testComposedCharacters()
+    {
+        // Composed characters that normalize to the same form under NFKC
+        $this->assertTrue(mb_is_string_equal_ci('é', 'é')); // e + combining acute accent vs é
+        $this->assertTrue(mb_is_string_equal_ci('ö', 'ö')); // o + combining diaeresis vs ö
+    }
+    public function testDecomposedCharacters()
+    {
+        // Decomposed form of Ä: A + combining diaeresis (U+00C4 -> U+0041 U+0308)
+        $this->assertTrue(mb_is_string_equal_ci('Ä', 'Ä'));
+    }
+    public function testCaseInsensitivity()
+    {
+        // Characters that are different in case but should be equal after case folding
+        $this->assertTrue(mb_is_string_equal_ci('abc', 'ABC'));
+        $this->assertTrue(mb_is_string_equal_ci('ß', 'SS')); // German eszett (ß) vs SS
+        $this->assertTrue(mb_is_string_equal_ci('Ä', 'ä'));
+    }
+
+    public function testDifferentStrings()
+    {
+        $this->assertFalse(mb_is_string_equal_ci('hello', 'world'));
+    }
+
+    public function testEmptyStrings()
+    {
+        $this->assertTrue(mb_is_string_equal_ci('', ''));
+    }
+
+    public function testLargeStrings()
+    {
+        // Generate a large string
+        $string1 = str_repeat('a', 100000);
+        $string2 = str_repeat('A', 100000);
+        $this->assertTrue(mb_is_string_equal_ci($string1, $string2));
+    }
+}