From 34ee237f36102cddc19f32d9f12f22a54f73ad87 Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Tue, 12 Dec 2023 12:03:25 +0100
Subject: [PATCH 1/7] Added implementation of nunique function

---
 docs/user-guide/advanced/Pandas_API.ipynb | 40 +++++++++++++++++++++++
 src/pykx/pandas_api/pandas_meta.py        |  9 +++++
 tests/test_pandas_api.py                  | 37 +++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
index cb98590..cb2385c 100644
--- a/docs/user-guide/advanced/Pandas_API.ipynb
+++ b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -2507,6 +2507,46 @@
     "tab.prod(numeric_only=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Table.nunique()\n",
+    "```\n",
+    "Table.nunique(axis=0, skipna=True, numeric_only=False, min_count=0)\n",
+    "```\n",
+    "\n",
+    "Returns the number of unique elements across the given axis.\n",
+    "\n",
+    "**Parameters:**\n",
+    "\n",
+    "| Name         | Type | Description                                                                      | Default |\n",
+    "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
+    "| axis         | int  | The axis to calculate the sum across 0 is columns, 1 is rows.                    | 0       |\n",
+    "| dropna       | bool | Don’t include NaN in the counts.                                                 | True    |\n",
+    "\n",
+    "**Returns:**\n",
+    "\n",
+    " | Type               | Description                                                          |\n",
+    " | :----------------: | :------------------------------------------------------------------- |\n",
+    " | Dictionary         | A dictionary where the key represent the column name / row number and the values are the result of calling `nunique` on that column / row. |"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "7b39a07bd7cd0af7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "tab.nunique()"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "f5592b19b69ad46d"
+  },
   {
    "cell_type": "markdown",
    "id": "655c3ad2",
diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index d0e44ec..a164230 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -255,3 +255,12 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
             res,
             min_count
         ), cols)
+    
+    @convert_result
+    def nunique(self, axis=0, dropna=True):
+        res, cols = preparse_computations(self, axis, skipna=False)
+        filterNan = q('{$[11h = type x;x;'
+                 '0h = type x;(x where not null x except w),(w:x where 10h=type each x);'
+                 'x where not null x]}each')
+        res = filterNan(res) if dropna else res
+        return q('(\'[count;distinct]\')', res), cols
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index acfe55f..ddb0873 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2029,3 +2029,40 @@ def test_keyed_loc_fixes(q):
         mkt[['k1', 'y']]
     with pytest.raises(KeyError):
         mkt['k1']
+
+
+def test_nunique(kx, q):
+    df = pd.DataFrame(
+        {
+            'a': [1, 2, 2, 4],
+            'b': [1, 2, 6, 7],
+            'c': [7, 8, 9, 10],
+            'd': ['foo', 'baz', 'baz', 'qux']
+        }
+    )
+    tab = kx.toq(df)
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(axis=1)
+    q_m = tab.nunique(axis=1)
+    for c in range(len(tab)):
+        assert p_m[c] == q_m[c].py()
+
+    tab = kx.q('([]A:4 0n 7 6;B:4 0n 0n 7;C:``foo`foo`)')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(axis=1, dropna=False)
+    q_m = tab.nunique(axis=1, dropna=False)
+    for c in range(len(tab)):
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(dropna=False)
+    q_m = tab.nunique(dropna=False)
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+
+

From 217cedf05fb88d00a987758da95e6a0131f31a9e Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Tue, 16 Jan 2024 13:09:01 +0100
Subject: [PATCH 2/7] Added test for handling strings nulls (" "),
 differentiating behavior between Python and kdb+

---
 tests/test_pandas_api.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index ddb0873..700077f 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2065,4 +2065,9 @@ def test_nunique(kx, q):
     for c in q.key(q_m).py():
         assert p_m[c] == q_m[c].py()
 
+    tab = kx.q('([]A:("";" ";"";"foo"))')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    assert p_m['A'] == 1 + q_m['A'].py()
 

From 1c5dbdaeda9461884bb65d1e3959624281989089 Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Tue, 16 Jan 2024 13:09:01 +0100
Subject: [PATCH 3/7] Added test for handling strings nulls (" "),
 differentiating behavior between Python and kdb+

---
 docs/user-guide/advanced/Pandas_API.ipynb | 10 +++++-----
 tests/test_pandas_api.py                  |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
index cb2385c..c66259a 100644
--- a/docs/user-guide/advanced/Pandas_API.ipynb
+++ b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -2519,10 +2519,10 @@
     "\n",
     "**Parameters:**\n",
     "\n",
-    "| Name         | Type | Description                                                                      | Default |\n",
-    "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n",
-    "| axis         | int  | The axis to calculate the sum across 0 is columns, 1 is rows.                    | 0       |\n",
-    "| dropna       | bool | Don’t include NaN in the counts.                                                 | True    |\n",
+    "| Name         | Type | Description                                                                         | Default |\n",
+    "| :----------: | :--: |:------------------------------------------------------------------------------------| :-----: |\n",
+    "| axis         | int  | The axis to calculate the number of unique elements across 0 is columns, 1 is rows. | 0       |\n",
+    "| dropna       | bool | Don’t include NaN in the counts.                                                    | True    |\n",
     "\n",
     "**Returns:**\n",
     "\n",
@@ -2533,7 +2533,7 @@
    "metadata": {
     "collapsed": false
    },
-   "id": "7b39a07bd7cd0af7"
+   "id": "5bc5e813e9673a84"
   },
   {
    "cell_type": "code",
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index ddb0873..700077f 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2065,4 +2065,9 @@ def test_nunique(kx, q):
     for c in q.key(q_m).py():
         assert p_m[c] == q_m[c].py()
 
+    tab = kx.q('([]A:("";" ";"";"foo"))')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    assert p_m['A'] == 1 + q_m['A'].py()
 

From 9fe428c3057d4d4e8d80a96bfd535271cae0bb46 Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Thu, 18 Jan 2024 08:39:42 +0100
Subject: [PATCH 4/7] Suggested changes. Error with mixed lists and tests for
 this case.

---
 src/pykx/pandas_api/pandas_meta.py |  9 ++++----
 tests/test_pandas_api.py           | 37 ++++++++++++------------------
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index 7bbaaec..e4698d1 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -260,11 +260,12 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
     @convert_result
     def nunique(self, axis=0, dropna=True):
         res, cols = preparse_computations(self, axis, skipna=False)
-        filterNan = q('{$[11h = type x;x;'
-                 '0h = type x;(x where not null x except w),(w:x where 10h=type each x);'
-                 'x where not null x]}each')
+        if q("any('[1<>count distinct@;type']')@", res).py():
+            raise NotImplementedError("Table contains a column whose type is mixed")
+        filterNan = q('{$[all[10h=type each x]|11h = type x;x;'
+                      'x where not null x]}each')
         res = filterNan(res) if dropna else res
-        return q('(\'[count;distinct]\')', res), cols
+        return q("('[count;distinct]')", res), cols
 
     def agg(self, func, axis=0, *args, **kwargs): # noqa: C901
         if 'KeyedTable' in str(type(self)):
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index 700077f..0c5f893 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2032,12 +2032,22 @@ def test_keyed_loc_fixes(q):
 
 
 def test_nunique(kx, q):
+    tab = kx.q('([]a:4 0n 7 6;b:4 0n 0n 7;c:``foo`foo`)')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(dropna=False)
+    q_m = tab.nunique(dropna=False)
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+
     df = pd.DataFrame(
         {
             'a': [1, 2, 2, 4],
             'b': [1, 2, 6, 7],
             'c': [7, 8, 9, 10],
-            'd': ['foo', 'baz', 'baz', 'qux']
         }
     )
     tab = kx.toq(df)
@@ -2050,24 +2060,7 @@ def test_nunique(kx, q):
     for c in range(len(tab)):
         assert p_m[c] == q_m[c].py()
 
-    tab = kx.q('([]A:4 0n 7 6;B:4 0n 0n 7;C:``foo`foo`)')
-    df = tab.pd()
-    p_m = df.nunique()
-    q_m = tab.nunique()
-    for c in q.key(q_m).py():
-        assert p_m[c] == q_m[c].py()
-    p_m = df.nunique(axis=1, dropna=False)
-    q_m = tab.nunique(axis=1, dropna=False)
-    for c in range(len(tab)):
-        assert p_m[c] == q_m[c].py()
-    p_m = df.nunique(dropna=False)
-    q_m = tab.nunique(dropna=False)
-    for c in q.key(q_m).py():
-        assert p_m[c] == q_m[c].py()
-
-    tab = kx.q('([]A:("";" ";"";"foo"))')
-    df = tab.pd()
-    p_m = df.nunique()
-    q_m = tab.nunique()
-    assert p_m['A'] == 1 + q_m['A'].py()
-
+    tab = kx.q('([]a:("";" ";"";"foo"))')
+    with pytest.raises(NotImplementedError,
+                       match=r"Table contains a column whose type is mixed"):
+        raise tab.nunique()

From 0f04d8e8dee2c3bab97437b41a1e2fcc35df846f Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Thu, 18 Jan 2024 08:39:42 +0100
Subject: [PATCH 5/7] Suggested changes. Error with mixed lists and tests for
 this case.

---
 src/pykx/pandas_api/pandas_meta.py |  9 ++++----
 tests/test_pandas_api.py           | 37 ++++++++++++------------------
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index 7bbaaec..1ba34f4 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -260,11 +260,12 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
     @convert_result
     def nunique(self, axis=0, dropna=True):
         res, cols = preparse_computations(self, axis, skipna=False)
-        filterNan = q('{$[11h = type x;x;'
-                 '0h = type x;(x where not null x except w),(w:x where 10h=type each x);'
-                 'x where not null x]}each')
+        if q("any('[1<>count distinct@;type']')@", res).py():
+            raise NotImplementedError("Table contains a column whose type is mixed")
+        filterNan = q('{$[all[10h=type each x]|11h = type x;x;'
+                      'x where not null x]}each')
         res = filterNan(res) if dropna else res
-        return q('(\'[count;distinct]\')', res), cols
+        return (q("('[count;distinct]')", res), cols)
 
     def agg(self, func, axis=0, *args, **kwargs): # noqa: C901
         if 'KeyedTable' in str(type(self)):
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index 700077f..0c5f893 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2032,12 +2032,22 @@ def test_keyed_loc_fixes(q):
 
 
 def test_nunique(kx, q):
+    tab = kx.q('([]a:4 0n 7 6;b:4 0n 0n 7;c:``foo`foo`)')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(dropna=False)
+    q_m = tab.nunique(dropna=False)
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+
     df = pd.DataFrame(
         {
             'a': [1, 2, 2, 4],
             'b': [1, 2, 6, 7],
             'c': [7, 8, 9, 10],
-            'd': ['foo', 'baz', 'baz', 'qux']
         }
     )
     tab = kx.toq(df)
@@ -2050,24 +2060,7 @@ def test_nunique(kx, q):
     for c in range(len(tab)):
         assert p_m[c] == q_m[c].py()
 
-    tab = kx.q('([]A:4 0n 7 6;B:4 0n 0n 7;C:``foo`foo`)')
-    df = tab.pd()
-    p_m = df.nunique()
-    q_m = tab.nunique()
-    for c in q.key(q_m).py():
-        assert p_m[c] == q_m[c].py()
-    p_m = df.nunique(axis=1, dropna=False)
-    q_m = tab.nunique(axis=1, dropna=False)
-    for c in range(len(tab)):
-        assert p_m[c] == q_m[c].py()
-    p_m = df.nunique(dropna=False)
-    q_m = tab.nunique(dropna=False)
-    for c in q.key(q_m).py():
-        assert p_m[c] == q_m[c].py()
-
-    tab = kx.q('([]A:("";" ";"";"foo"))')
-    df = tab.pd()
-    p_m = df.nunique()
-    q_m = tab.nunique()
-    assert p_m['A'] == 1 + q_m['A'].py()
-
+    tab = kx.q('([]a:("";" ";"";"foo"))')
+    with pytest.raises(NotImplementedError,
+                       match=r"Table contains a column whose type is mixed"):
+        raise tab.nunique()

From 2aa3a6e60235a670327163f1dad8b88d693f57cb Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Thu, 18 Jan 2024 12:05:15 +0100
Subject: [PATCH 6/7] QError for mixed lists (suggested by Kx)

---
 src/pykx/pandas_api/pandas_meta.py | 2 --
 tests/test_pandas_api.py           | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index 1ba34f4..59de3d8 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -260,8 +260,6 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
     @convert_result
     def nunique(self, axis=0, dropna=True):
         res, cols = preparse_computations(self, axis, skipna=False)
-        if q("any('[1<>count distinct@;type']')@", res).py():
-            raise NotImplementedError("Table contains a column whose type is mixed")
         filterNan = q('{$[all[10h=type each x]|11h = type x;x;'
                       'x where not null x]}each')
         res = filterNan(res) if dropna else res
diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
index 0c5f893..e6e9891 100644
--- a/tests/test_pandas_api.py
+++ b/tests/test_pandas_api.py
@@ -2061,6 +2061,5 @@ def test_nunique(kx, q):
         assert p_m[c] == q_m[c].py()
 
     tab = kx.q('([]a:("";" ";"";"foo"))')
-    with pytest.raises(NotImplementedError,
-                       match=r"Table contains a column whose type is mixed"):
+    with pytest.raises(kx.QError):
         raise tab.nunique()

From 4aff510f97b4a5d80471fca423dcff5ecdcfaf26 Mon Sep 17 00:00:00 2001
From: chraberturas <christian.aberturas@hablapps.com>
Date: Mon, 22 Jan 2024 10:35:03 +0100
Subject: [PATCH 7/7] minor: rename filternan (suggested)

---
 src/pykx/pandas_api/pandas_meta.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
index 59de3d8..75805a3 100644
--- a/src/pykx/pandas_api/pandas_meta.py
+++ b/src/pykx/pandas_api/pandas_meta.py
@@ -260,9 +260,9 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
     @convert_result
     def nunique(self, axis=0, dropna=True):
         res, cols = preparse_computations(self, axis, skipna=False)
-        filterNan = q('{$[all[10h=type each x]|11h = type x;x;'
+        filternan = q('{$[all[10h=type each x]|11h = type x;x;'
                       'x where not null x]}each')
-        res = filterNan(res) if dropna else res
+        res = filternan(res) if dropna else res
         return (q("('[count;distinct]')", res), cols)
 
     def agg(self, func, axis=0, *args, **kwargs): # noqa: C901