From 6eaee34939e45406093cb27a4cadbd9a2a39df09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcos=20V=C3=A1zquez?= Date: Mon, 18 Dec 2023 13:41:24 +0100 Subject: [PATCH 01/11] Add count and add prefix and sufix implementation --- docs/user-guide/advanced/Pandas_API.ipynb | 312 +++++++++++++++++++++- src/pykx/pandas_api/pandas_indexing.py | 22 ++ src/pykx/pandas_api/pandas_meta.py | 5 + tests/test_pandas_api.py | 133 +++++++++ 4 files changed, 471 insertions(+), 1 deletion(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 239c4c8..22e8772 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -646,6 +646,110 @@ "tab.mode(dropna=False)" ] }, + { + "cell_type": "markdown", + "id": "f5c66579", + "metadata": {}, + "source": [ + "### Table.std()\n", + "\n", + "```\n", + "Table.std(axis=0, skipna=True, numeric_only=False, ddof=0)\n", + "```\n", + "\n", + "Return sample standard deviation over requested axis. Normalized by N-1 by default. This can be changed using the ddof argument.\n", + "\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the sum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | not yet implemented | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "| ddof | int | Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. | 1 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | The std across each row / column with the key corresponding to the row number or column name. |" + ] + }, + { + "cell_type": "markdown", + "id": "c2767afd", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Calculate the std across the columns of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87b94fd0", + "metadata": {}, + "outputs": [], + "source": [ + "tab = kx.Table(data=\n", + " {\n", + " 'a': [1, 2, 2, 4],\n", + " 'b': [1, 2, 6, 7],\n", + " 'c': [7, 8, 9, 10],\n", + " 'd': [7, 11, 14, 14]\n", + " }\n", + ")\n", + "tab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e54d557", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std()" + ] + }, + { + "cell_type": "markdown", + "id": "14950833", + "metadata": {}, + "source": [ + "Calculate the std across the rows of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f19161ed", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std(axis=2)" + ] + }, + { + "cell_type": "markdown", + "id": "a8ea5a38", + "metadata": {}, + "source": [ + "Calculate std accross columns with ddof=0:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6361dcb7", + "metadata": {}, + "outputs": [], + "source": [ + "tab.std(ddof=0)" + ] + }, { "cell_type": "markdown", "id": "7e2813b4", @@ -1813,6 +1917,136 @@ "df.astype({'c4':kx.SymbolVector, 'c5':kx.SymbolVector})" ] }, + { + "cell_type": "markdown", + "id": "0f8813a0", + "metadata": {}, + "source": [ + "### Table.add_prefix()\n", + "\n", + "```\n", + "Table.add_prefix(columns)\n", + "```\n", + "\n", + "Rename columns adding a prefix in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| prefix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add prefix on. | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a prefix. |" + ] + }, + { + "cell_type": "markdown", + "id": "9186ed86", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "he initial table to which a prefix will be added to its columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f20131b", + "metadata": {}, + "outputs": [], + "source": [ + "tab.head()" + ] + }, + { + "cell_type": "markdown", + "id": "73c2b08f", + "metadata": {}, + "source": [ + "Add \"col_\" to table columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "926c8295", + "metadata": {}, + "outputs": [], + "source": [ + "tab.add_prefix(prefix=\"col_\").head()" + ] + }, + { + "cell_type": "markdown", + "id": "0a4abc8c", + "metadata": {}, + "source": [ + "### Table.add_sufix()\n", + "\n", + "```\n", + "Table.add_sufix(columns)\n", + "```\n", + "\n", + "Rename columns adding a sufix in a table and return the resulting Table object.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", + "| sufix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add sufix on. | 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :---: | :----------------------------------------------------------------- |\n", + "| Table | A table with the given column(s) renamed adding a sufix. |" + ] + }, + { + "cell_type": "markdown", + "id": "c22262b8", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "he initial table to which a sufix will be added to its columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55c1f504", + "metadata": {}, + "outputs": [], + "source": [ + "tab.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b4687851", + "metadata": {}, + "source": [ + "Add \"_col\" to table columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e00d0f5c", + "metadata": {}, + "outputs": [], + "source": [ + "tab.add_sufix(sufix=\"_col\").head()" + ] + }, { "cell_type": "markdown", "id": "718584f8", @@ -2507,6 +2741,82 @@ "tab.prod(numeric_only=True)" ] }, + { + "cell_type": "markdown", + "id": "c87d4f95", + "metadata": {}, + "source": [ + "### Table.count()\n", + "\n", + "```\n", + "Table.count(axis=0, numeric_only=False)\n", + "```\n", + "\n", + "Returns the count of non-NA values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to count elements across 1 is columns, 0 is rows. | 0 |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `count` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6520c195", + "metadata": {}, + "outputs": [], + "source": [ + "tab.count()" + ] + }, + { + "cell_type": "markdown", + "id": "ce85797d", + "metadata": {}, + "source": [ + "### Table.skew()\n", + "\n", + "```\n", + "Table.skew(axis=0, skipna=True, numeric_only=False)\n", + "```\n", + "\n", + "Returns the skewness of all values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the skewness across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `skew` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fb5dce1", + "metadata": {}, + "outputs": [], + "source": [ + "tab.skew(numeric_only=True)" + ] + }, { "cell_type": "markdown", "id": "499025cb", @@ -3032,7 +3342,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 954896d..1e96f0c 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -453,6 +453,28 @@ def rename(self, labels=None, index=None, columns=None, axis=0, t = _rename_columns(t, columns) return t + + def add_suffix(self, suffix, axis=0): + t = self + if axis == 1: + c_str = 'cols value' if "Keyed" in str(type(t)) else 'cols' + t = q(f'{{(c!`$string[c:{c_str} y],\:string x)xcol y}}', suffix, t) + elif axis == 0: + raise ValueError('nyi') + else: + raise ValueError(f'No axis named {axis}') + return t + + def add_prefix(self, prefix, axis=0): + t = self + if axis == 1: + c_str = 'cols value' if "Keyed" in str(type(t)) else 'cols' + t = q(f'{{(c!`$string[x],/:string c:{c_str} y)xcol y}}', prefix, t) + elif axis == 0: + raise ValueError('nyi') + else: + raise ValueError(f'No axis named {axis}') + return t def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None, ignore_index=False): diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 39668d5..248c142 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -311,3 +311,8 @@ def agg(self, func, axis=0, *args, **kwargs): # noqa: C901 return data else: return (q('{(flip enlist[`function]!enlist x)!y}', keyname, data)) + + @convert_result + def count(self, axis=0, numeric_only=False): + res, cols = preparse_computations(self, axis, True, numeric_only) + return (q('count each', res), cols) \ No newline at end of file diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index acfe55f..d7bf27f 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2029,3 +2029,136 @@ def test_keyed_loc_fixes(q): mkt[['k1', 'y']] with pytest.raises(KeyError): mkt['k1'] + + +def test_pandas_count(q): + tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))') + df = tab.pd() + + # Assert axis = 1 + qcount = tab.count(axis=1).py() + pcount = df.count(axis=1) + + print(pcount) + assert int(qcount[0]) == int(pcount[0]) + assert int(qcount[1]) == 1 + + # Assert axis = 0 + qcount = tab.count().py() + pcount = df.count() + + assert int(qcount["k1"]) == int(pcount["k1"]) + assert int(qcount["k2"]) == 3 + + # Assert only numeric + qcount = tab.count(numeric_only=True).py() + pcount = df.count(numeric_only=True) + + assert int(qcount["k1"]) == int(pcount["k1"]) + + +def test_df_add_prefix(kx, q): + q('sym:`aaa`bbb`ccc') + t = q('([] 10?sym; til 10; 10?10; 10?1f)') + + q_add_prefix = t.add_prefix("col_", axis=1) + + assert(q('{x~y}', q_add_prefix, t.pd().add_prefix("col_"))) + + kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') + + q_add_prefix = kt.add_prefix("col_", axis=1) + assert(q('{x~y}', q_add_prefix, kt.pd().add_prefix("col_"))) + + with pytest.raises(ValueError): + t.add_prefix("col_", axis=0) + + +def test_df_add_suffix(kx, q): + q('sym:`aaa`bbb`ccc') + t = q('([] 10?sym; til 10; 10?10; 10?1f)') + + q_add_suffix = t.add_suffix("_col", axis=1) + + assert(q('{x~y}', q_add_suffix, t.pd().add_suffix("_col"))) + + kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') + + q_add_suffix = kt.add_suffix("_col", axis=1) + assert(q('{x~y}', q_add_suffix, kt.pd().add_suffix("_col"))) + + with pytest.raises(ValueError): + t.add_suffix("_col", axis=0) + + +def test_pandas_skew(q): + tab = q('([] price: 250.0f - 100?500.0f; ints: 100 - 100?200)') + df = tab.pd() + + qskew = tab.skew().py() + pskew = df.skew() + assert round(float(qskew['price']), 6) == round(float(pskew['price']), 6) + assert round(float(qskew['ints']), 6) == round(float(pskew['ints']), 6) + + +def test_std(kx, q): + df = pd.DataFrame( + { + 'a': [1, 2, 2, 4], + 'b': [1, 2, 6, 7], + 'c': [7, 8, 9, 10], + 'd': [7, 11, 14, 14] + } + ) + tab = kx.toq(df) + p_m = df.std() + q_m = tab.std() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1) + q_m = tab.std(axis=1) + for c in range(len(q.cols(tab))): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + p_m = df.std(ddof=0) + q_m = tab.std(ddof=0) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + p_m = df.std(ddof=4) + q_m = tab.std(ddof=4) + for c in q.key(q_m).py(): + assert np.isnan(p_m[c]) == np.isnan(q_m[c].py()) + + q['tab'] = kx.toq(df) + tab = q('1!`idx xcols update idx: til count tab from tab') + p_m = df.std() + q_m = tab.std() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1) + q_m = tab.std(axis=1) + for c in range(len(q.cols(tab)) - 1): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + + df = pd.DataFrame( + { + 'a': [1, 2, 2, 4], + 'b': [1, 2, 6, 7], + 'c': [7, 8, 9, 10], + 'd': ['foo', 'bar', 'baz', 'qux'] + } + ) + tab = kx.toq(df) + p_m = df.std(numeric_only=True) + q_m = tab.std(numeric_only=True) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + p_m = df.std(axis=1, numeric_only=True) + q_m = tab.std(axis=1, numeric_only=True) + for c in range(len(q.cols(tab))): + assert p_m[c] == q_m[q('{`$string x}', c)].py() + + with pytest.raises(kx.QError): + q_m = tab.std() + with pytest.raises(kx.QError): + q_m = tab.std(axis=1) From f9f396fb0f3dd5b46ecb934c1ce089798c6e4b21 Mon Sep 17 00:00:00 2001 From: marcosvm13 <61940472+marcosvm13@users.noreply.github.com> Date: Mon, 18 Dec 2023 13:53:27 +0100 Subject: [PATCH 02/11] Update version Pandas API Notebook --- docs/user-guide/advanced/Pandas_API.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 22e8772..39b1e96 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -3342,7 +3342,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.8.3" } }, "nbformat": 4, From 531fc8555dab8c7e4b7e0270f4931e5b86a9ddf1 Mon Sep 17 00:00:00 2001 From: Oscar Nydza Date: Mon, 18 Dec 2023 16:27:21 +0100 Subject: [PATCH 03/11] Added skew and std implementations --- src/pykx/pandas_api/pandas_meta.py | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 248c142..2e981f1 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -154,6 +154,31 @@ def mean(self, axis: int = 0, numeric_only: bool = False): tab ) + @api_return + def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): + tab = self + if 'Keyed' in str(type(tab)): + tab = q('{(keys x) _ 0!x}', tab) + if numeric_only: + tab = _get_numeric_only_subtable(tab) + + key_str = '' if axis == 0 else '`$string ' + val_str = '' if axis == 0 else '"f"$value ' + query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]' + where_str = ' where not (::)~/:r[;1]' + x_dev_str = f'{{avg sqrt (sum xexp[x-avg x;2]) % count[x]-{ddof}}}' + dev_str = 'dev' if ddof == 0 else 'sdev' if ddof == 1 else x_dev_str + + if ddof == len(tab): + return q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab) + + return q( + '{[tab]' + f'r:{{[tab; x] ({key_str}x; {dev_str} {val_str}tab[x])}}[tab;] each {query_str};' + f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}', + tab + ) + @api_return def median(self, axis: int = 0, numeric_only: bool = False): tab = self @@ -245,6 +270,18 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): min_count ), cols) + @convert_result + def skew(self, axis=0, skipna=True, numeric_only=False): + res, cols = preparse_computations(self, axis, skipna, numeric_only) + return (q( + '{[row]' + # adjusted Fisher-Pearson standardized moment + 'm:{(sum (x - avg x) xexp y) % count x};' + 'g1:{[m;x]m:m[x]; m[3] % m[2] xexp 3%2}[m];' + '{[g1;x]g1[x] * sqrt[n * n-1] % neg[2] + n:count x}[g1] each row}', + res + ), cols) + @convert_result def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): res, cols = preparse_computations(self, axis, skipna, numeric_only) From 27f685ff8976e86b62a624373d42a1bde334eb89 Mon Sep 17 00:00:00 2001 From: marcosvm13 <61940472+marcosvm13@users.noreply.github.com> Date: Mon, 18 Dec 2023 17:00:10 +0100 Subject: [PATCH 04/11] fix typo --- docs/user-guide/advanced/Pandas_API.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 39b1e96..05f3115 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -1951,7 +1951,7 @@ "source": [ "**Examples:**\n", "\n", - "he initial table to which a prefix will be added to its columns" + "The initial table to which a prefix will be added to its columns" ] }, { @@ -2016,7 +2016,7 @@ "source": [ "**Examples:**\n", "\n", - "he initial table to which a sufix will be added to its columns" + "The initial table to which a sufix will be added to its columns" ] }, { From 91caa667ff414c605791d19e8aa1e42b11cc794d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20L=C3=B3pez-Gonz=C3=A1lez?= Date: Tue, 19 Dec 2023 08:56:23 +0100 Subject: [PATCH 05/11] Extends test cases for skew --- tests/test_pandas_api.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index d7bf27f..7d17077 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2094,12 +2094,32 @@ def test_df_add_suffix(kx, q): def test_pandas_skew(q): tab = q('([] price: 250.0f - 100?500.0f; ints: 100 - 100?200)') df = tab.pd() - qskew = tab.skew().py() pskew = df.skew() assert round(float(qskew['price']), 6) == round(float(pskew['price']), 6) assert round(float(qskew['ints']), 6) == round(float(pskew['ints']), 6) + tab = q('^', q('([]sym:100?`foo`bar`baz`qux)'), tab) + df = tab.pd() + qskew = tab.skew(numeric_only=True).py() + pskew = df.skew(numeric_only=True) + assert round(float(qskew['price']), 6) == round(float(pskew['price']), 6) + assert round(float(qskew['ints']), 6) == round(float(pskew['ints']), 6) + + tab = q('^', q('([]foo:(5#0n),95?500.0f)'), tab) + df = tab.pd() + qskew = tab.skew(numeric_only=True, skipna=True).py() + pskew = df.skew(numeric_only=True, skipna=True) + assert round(float(qskew['foo']), 6) == round(float(pskew['foo']), 6) + + tab = q('_', 5, tab) # discard rows with null "foo"s + df = tab.pd() + qskew = tab.skew(numeric_only=True, axis=1).py() + pskew = df.skew(numeric_only=True, axis=1) + print(q('~', qskew, pskew)) + for r in range(len(qskew)): + assert round(float(qskew[r]), 6) == round(float(pskew[r]), 6) + def test_std(kx, q): df = pd.DataFrame( From 348917b0637892949f6f5fc656bb8d9f14644027 Mon Sep 17 00:00:00 2001 From: marcosvm13 <61940472+marcosvm13@users.noreply.github.com> Date: Tue, 19 Dec 2023 09:38:32 +0100 Subject: [PATCH 06/11] fix problems in test pandas API --- tests/test_pandas_api.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 7d17077..ba5a1fb 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2035,22 +2035,18 @@ def test_pandas_count(q): tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))') df = tab.pd() - # Assert axis = 1 qcount = tab.count(axis=1).py() pcount = df.count(axis=1) - - print(pcount) + assert int(qcount[0]) == int(pcount[0]) assert int(qcount[1]) == 1 - # Assert axis = 0 qcount = tab.count().py() pcount = df.count() assert int(qcount["k1"]) == int(pcount["k1"]) assert int(qcount["k2"]) == 3 - # Assert only numeric qcount = tab.count(numeric_only=True).py() pcount = df.count(numeric_only=True) @@ -2058,38 +2054,40 @@ def test_pandas_count(q): def test_df_add_prefix(kx, q): - q('sym:`aaa`bbb`ccc') - t = q('([] 10?sym; til 10; 10?10; 10?1f)') + t = q('([] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_prefix = t.add_prefix("col_", axis=1) - assert(q('{x~y}', q_add_prefix, t.pd().add_prefix("col_"))) + assert(q('~', q_add_prefix, t.pd().add_prefix("col_", axis=1))) kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_prefix = kt.add_prefix("col_", axis=1) - assert(q('{x~y}', q_add_prefix, kt.pd().add_prefix("col_"))) + assert(q('~', q_add_prefix, kt.pd().add_prefix("col_", axis=1))) with pytest.raises(ValueError): t.add_prefix("col_", axis=0) - + + with pytest.raises(ValueError): + t.add_suffix("col_", axis=3) def test_df_add_suffix(kx, q): - q('sym:`aaa`bbb`ccc') - t = q('([] 10?sym; til 10; 10?10; 10?1f)') + t = q('([] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_suffix = t.add_suffix("_col", axis=1) - assert(q('{x~y}', q_add_suffix, t.pd().add_suffix("_col"))) + assert(q('~', q_add_suffix, t.pd().add_suffix("_col", axis=1))) kt = kx.q('([idx:til 5] til 5; 5?5; 5?1f; (5;5)#100?" ")') q_add_suffix = kt.add_suffix("_col", axis=1) - assert(q('{x~y}', q_add_suffix, kt.pd().add_suffix("_col"))) + assert(q('~', q_add_suffix, kt.pd().add_suffix("_col", axis=1))) with pytest.raises(ValueError): t.add_suffix("_col", axis=0) - + + with pytest.raises(ValueError): + t.add_suffix("_col", axis=3) def test_pandas_skew(q): tab = q('([] price: 250.0f - 100?500.0f; ints: 100 - 100?200)') From 6127e3b48fab3433273b58b4037d5ae3a1ac803b Mon Sep 17 00:00:00 2001 From: Oscar Nydza Date: Wed, 20 Dec 2023 09:52:27 +0100 Subject: [PATCH 07/11] Applied comment for std function --- src/pykx/pandas_api/pandas_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 2e981f1..7146893 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -158,7 +158,7 @@ def mean(self, axis: int = 0, numeric_only: bool = False): def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): tab = self if 'Keyed' in str(type(tab)): - tab = q('{(keys x) _ 0!x}', tab) + tab = q('value', tab) if numeric_only: tab = _get_numeric_only_subtable(tab) From a1ac6d84964f46ae76b7a07603c889d34f44be12 Mon Sep 17 00:00:00 2001 From: Oscar Nydza Date: Wed, 20 Dec 2023 11:21:08 +0100 Subject: [PATCH 08/11] Changed value function call to q.value() --- src/pykx/pandas_api/pandas_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 7146893..95cec79 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -158,7 +158,7 @@ def mean(self, axis: int = 0, numeric_only: bool = False): def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): tab = self if 'Keyed' in str(type(tab)): - tab = q('value', tab) + tab = q.value(tab) if numeric_only: tab = _get_numeric_only_subtable(tab) From 8e58d37e1164defd860d7e92141bc295f2373c1c Mon Sep 17 00:00:00 2001 From: marcosvm13 <61940472+marcosvm13@users.noreply.github.com> Date: Wed, 20 Dec 2023 12:26:52 +0100 Subject: [PATCH 09/11] Add error validation to test --- tests/test_pandas_api.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index ba5a1fb..ae65874 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2065,11 +2065,14 @@ def test_df_add_prefix(kx, q): q_add_prefix = kt.add_prefix("col_", axis=1) assert(q('~', q_add_prefix, kt.pd().add_prefix("col_", axis=1))) - with pytest.raises(ValueError): + with pytest.raises(ValueError) as err: t.add_prefix("col_", axis=0) + assert 'nyi' in str(err) - with pytest.raises(ValueError): - t.add_suffix("col_", axis=3) + with pytest.raises(ValueError) as err: + t.add_prefix("col_", axis=3) + assert 'No axis named 3' in str(err) + def test_df_add_suffix(kx, q): t = q('([] til 5; 5?5; 5?1f; (5;5)#100?" ")') @@ -2083,11 +2086,14 @@ def test_df_add_suffix(kx, q): q_add_suffix = kt.add_suffix("_col", axis=1) assert(q('~', q_add_suffix, kt.pd().add_suffix("_col", axis=1))) - with pytest.raises(ValueError): + with pytest.raises(ValueError) as err: t.add_suffix("_col", axis=0) - - with pytest.raises(ValueError): + assert 'nyi' in str(err) + + with pytest.raises(ValueError) as err: t.add_suffix("_col", axis=3) + assert 'No axis named 3' in str(err) + def test_pandas_skew(q): tab = q('([] price: 250.0f - 100?500.0f; ints: 100 - 100?200)') From 689100cd46529085b8a772661473e7085106fe80 Mon Sep 17 00:00:00 2001 From: marcosvm13 <61940472+marcosvm13@users.noreply.github.com> Date: Wed, 20 Dec 2023 13:04:56 +0100 Subject: [PATCH 10/11] Update typos in documentation --- docs/user-guide/advanced/Pandas_API.ipynb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 05f3115..56c8d38 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -673,7 +673,7 @@ "\n", "| Type | Description |\n", "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | The std across each row / column with the key corresponding to the row number or column name. |" + "| Table | The std across each row / column with the key corresponding to the row number or column name. |" ] }, { @@ -729,7 +729,7 @@ "metadata": {}, "outputs": [], "source": [ - "tab.std(axis=2)" + "tab.std(axis=1)" ] }, { @@ -1987,26 +1987,26 @@ "id": "0a4abc8c", "metadata": {}, "source": [ - "### Table.add_sufix()\n", + "### Table.add_suffix()\n", "\n", "```\n", - "Table.add_sufix(columns)\n", + "Table.add_suffix(columns)\n", "```\n", "\n", - "Rename columns adding a sufix in a table and return the resulting Table object.\n", + "Rename columns adding a suffix in a table and return the resulting Table object.\n", "\n", "**Parameters:**\n", "\n", "| Name | Type | Description | Default |\n", "| :-----: | :-------------: | :------------------------------------------------------------------ | :--------: |\n", - "| sufix | str | The string that will be concatenated with the name of the columns | _required_ |\n", - "| axis | int | Axis to add sufix on. | 0 |\n", + "| suffix | str | The string that will be concatenated with the name of the columns | _required_ |\n", + "| axis | int | Axis to add suffix on. | 0 |\n", "\n", "**Returns:**\n", "\n", "| Type | Description |\n", "| :---: | :----------------------------------------------------------------- |\n", - "| Table | A table with the given column(s) renamed adding a sufix. |" + "| Table | A table with the given column(s) renamed adding a suffix. |" ] }, { @@ -2016,7 +2016,7 @@ "source": [ "**Examples:**\n", "\n", - "The initial table to which a sufix will be added to its columns" + "The initial table to which a suffix will be added to its columns" ] }, { @@ -2044,7 +2044,7 @@ "metadata": {}, "outputs": [], "source": [ - "tab.add_sufix(sufix=\"_col\").head()" + "tab.add_suffix(suffix=\"_col\").head()" ] }, { @@ -2752,7 +2752,7 @@ "Table.count(axis=0, numeric_only=False)\n", "```\n", "\n", - "Returns the count of non-NA values across the given axis.\n", + "Returns the count of non null values across the given axis.\n", "\n", "**Parameters:**\n", "\n", From 004f7853b28ba1a6eef3f7224828cfeb136157d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rian=20=C3=93=20Cuinneag=C3=A1in?= Date: Tue, 16 Jan 2024 19:53:11 +0000 Subject: [PATCH 11/11] Refactor q code. pflake8 fixes. --- src/pykx/pandas_api/pandas_indexing.py | 14 +++++++---- src/pykx/pandas_api/pandas_meta.py | 34 ++++++++++++-------------- tests/test_pandas_api.py | 6 ++--- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 1e96f0c..e3dfeb0 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -453,12 +453,14 @@ def rename(self, labels=None, index=None, columns=None, axis=0, t = _rename_columns(t, columns) return t - + def add_suffix(self, suffix, axis=0): t = self if axis == 1: - c_str = 'cols value' if "Keyed" in str(type(t)) else 'cols' - t = q(f'{{(c!`$string[c:{c_str} y],\:string x)xcol y}}', suffix, t) + t = q('''{[s;t] + c:$[99h~type t;cols value@;cols] t; + (c!`$string[c],\\:string s) xcol t + }''', suffix, t) elif axis == 0: raise ValueError('nyi') else: @@ -468,8 +470,10 @@ def add_suffix(self, suffix, axis=0): def add_prefix(self, prefix, axis=0): t = self if axis == 1: - c_str = 'cols value' if "Keyed" in str(type(t)) else 'cols' - t = q(f'{{(c!`$string[x],/:string c:{c_str} y)xcol y}}', prefix, t) + t = q('''{[s;t] + c:$[99h~type t;cols value@;cols] t; + (c!`$string[s],/:string[c]) xcol t + }''', prefix, t) elif axis == 0: raise ValueError('nyi') else: diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 95cec79..64a9af0 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -162,21 +162,19 @@ def std(self, axis: int = 0, ddof: int = 1, numeric_only: bool = False): if numeric_only: tab = _get_numeric_only_subtable(tab) - key_str = '' if axis == 0 else '`$string ' - val_str = '' if axis == 0 else '"f"$value ' - query_str = 'cols[tab]' if axis == 0 else 'til[count[tab]]' - where_str = ' where not (::)~/:r[;1]' - x_dev_str = f'{{avg sqrt (sum xexp[x-avg x;2]) % count[x]-{ddof}}}' - dev_str = 'dev' if ddof == 0 else 'sdev' if ddof == 1 else x_dev_str + axis_keys = q('{[axis;tab] $[0~axis;cols;`$string til count @] tab}', axis, tab) if ddof == len(tab): - return q(f'{{[tab]{query_str}!count[{query_str}]#0n}}', tab) + return q('{x!count[x]#0n}', axis_keys) return q( - '{[tab]' - f'r:{{[tab; x] ({key_str}x; {dev_str} {val_str}tab[x])}}[tab;] each {query_str};' - f'(,/) {{(enlist x 0)!(enlist x 1)}} each r{where_str}}}', - tab + '''{[tab;axis;ddof;axis_keys] + tab:$[0~axis;(::);flip] value flip tab; + d:$[0~ddof;dev; + 1~ddof;sdev; + {[ddof;x] avg sqrt (sum xexp[x-avg x;2]) % count[x]-ddof}ddof]; + axis_keys!d each tab + }''', tab, axis, ddof, axis_keys ) @api_return @@ -274,13 +272,11 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): def skew(self, axis=0, skipna=True, numeric_only=False): res, cols = preparse_computations(self, axis, skipna, numeric_only) return (q( - '{[row]' - # adjusted Fisher-Pearson standardized moment - 'm:{(sum (x - avg x) xexp y) % count x};' - 'g1:{[m;x]m:m[x]; m[3] % m[2] xexp 3%2}[m];' - '{[g1;x]g1[x] * sqrt[n * n-1] % neg[2] + n:count x}[g1] each row}', - res - ), cols) + '''{[row] + m:{(sum (x - avg x) xexp y) % count x}; + g1:{[m;x]m:m[x]; m[3] % m[2] xexp 3%2}[m]; + (g1 each row) * {sqrt[n * n-1] % neg[2] + n:count x} each row + }''', res), cols) @convert_result def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): @@ -352,4 +348,4 @@ def agg(self, func, axis=0, *args, **kwargs): # noqa: C901 @convert_result def count(self, axis=0, numeric_only=False): res, cols = preparse_computations(self, axis, True, numeric_only) - return (q('count each', res), cols) \ No newline at end of file + return (q('count each', res), cols) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index ae65874..5bde38e 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2037,7 +2037,7 @@ def test_pandas_count(q): qcount = tab.count(axis=1).py() pcount = df.count(axis=1) - + assert int(qcount[0]) == int(pcount[0]) assert int(qcount[1]) == 1 @@ -2068,7 +2068,7 @@ def test_df_add_prefix(kx, q): with pytest.raises(ValueError) as err: t.add_prefix("col_", axis=0) assert 'nyi' in str(err) - + with pytest.raises(ValueError) as err: t.add_prefix("col_", axis=3) assert 'No axis named 3' in str(err) @@ -2089,7 +2089,7 @@ def test_df_add_suffix(kx, q): with pytest.raises(ValueError) as err: t.add_suffix("_col", axis=0) assert 'nyi' in str(err) - + with pytest.raises(ValueError) as err: t.add_suffix("_col", axis=3) assert 'No axis named 3' in str(err)