Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds pandas API round function #35

Open
wants to merge 2 commits into
base: feature/pandas-api-3rd-block
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion docs/user-guide/advanced/Pandas_API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2921,7 +2921,69 @@
},
{
"cell_type": "markdown",
"id": "4e6fad4f",
"id": "0c056fd9-fe7b-43d5-b1c7-7ceec3cae5ff",
"metadata": {},
"source": [
"### Table.round()\n",
"\n",
"```\n",
"Table.round(self, decimals: Union[int, Dict[str, int]] = 0)\n",
"```\n",
"\n",
"Round a Table to a variable number of decimal places.\n",
"\n",
"**Parameters:**\n",
"\n",
"| Name | Type | Description | Default |\n",
"| :--------------: | :-----------------: | :------------------------------------------------------------ | :-----: |\n",
"| decimals | int or Dict | Number of decimal places to round each column to. If an int is given, round each real or float column to the same number of places. Otherwise, dict rounds to variable numbers of places. Column names should be in the keys if decimals parameter is a dict-like and the decimals to round should be the value. Any columns not included in decimals will be left as is. Elements of decimals which are not columns of the input will be ignored.| 0 |\n",
"\n",
"**Returns:**\n",
"\n",
"| Type | Description |\n",
"| :--------: | :--------------------------------------------------------------------------------------- |\n",
"| Table | A Table with the affected columns rounded to the specified number of decimal places. |"
]
},
{
"cell_type": "markdown",
"id": "1b629def",
"metadata": {},
"source": [
"If an integer is provided it rounds every float column to set decimals."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "08c182c9",
"metadata": {},
"outputs": [],
"source": [
"tab.round(1)"
]
},
{
"cell_type": "markdown",
"id": "28853fc0",
"metadata": {},
"source": [
"If a dict whose keys are the column names and its values are the decimals to round set column is provided, it will round them accordingly.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7640df4c",
"metadata": {},
"outputs": [],
"source": [
"tab.round({\"price\": 1, \"traded\": 0})"
]
},
{
"cell_type": "markdown",
"id": "cbcdf84e",
"metadata": {},
"source": [
"Cast all columns to dtype LongVector"
Expand Down
47 changes: 47 additions & 0 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,28 @@ def inner(*args, **kwargs):
'': b'kx.List'}


# Define the mapping between the returns of kx.*Vector.t and the associated typechar
_typenum_to_typechar_mapping = {0: '',
1: 'b',
2: 'g',
4: 'x',
5: 'h',
6: 'i',
7: 'j',
8: 'e',
9: 'f',
10: 'c',
11: 's',
12: 'p',
14: 'd',
15: 'z',
16: 'n',
17: 'u',
18: 'v',
19: 't',
13: 'm'}


class PandasMeta:
# Dataframe properties
@property
Expand Down Expand Up @@ -243,6 +265,31 @@ def abs(self, numeric_only=False):
tab = _get_numeric_only_subtable(self)
return q.abs(tab)

@api_return
def round(self, decimals=0):
tab = self
if 'Keyed' in str(type(tab)):
tab = q.value(tab)

affected_cols = _get_numeric_only_subtable(tab).columns.py()
type_dict = {col: _typenum_to_typechar_mapping[tab[col].t] for col in affected_cols}

cast_back = q('{string[y][0]$x}')

if isinstance(decimals, int) or q("{-7h~type x}", decimals):
dec_dict = {col: decimals for col in affected_cols}
elif isinstance(decimals, dict) or (q("{99h~type x}", decimals) and
'Keyed' not in str(type(decimals))):
dec_dict = {col: decimals[col] for col in affected_cols}
else:
raise TypeError('Parameter "decimals" should be integer or dictionary.')

rounded = {col: [cast_back(round(elem, dec_dict[col]), type_dict[col])
for elem in tab[col]]
for col in dec_dict}

return q.qsql.update(tab, columns=rounded)

@convert_result
def all(self, axis=0, bool_only=False, skipna=True):
res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only)
Expand Down
60 changes: 60 additions & 0 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1840,6 +1840,66 @@ def test_pandas_abs(kx, q):
tab.abs()


def test_pandas_round(kx, q):
q_tab = q('([]c1:4 5 10 15 20 25h;'
'c2:4 5 10 15 20 25i;'
'c3:4 5 10 15 20 25j;'
'c4:0 0.10 0.25 0.30 0.45 0.50e;'
'c5:0 0.10 0.25 0.30 0.45 0.50f;'
'c6:`a`b`c`d`e`f)')
p_tab = q_tab.pd()

pd.testing.assert_frame_equal(p_tab.round(),
q_tab.round().pd())

pd.testing.assert_frame_equal(q_tab.round(0).pd(),
q_tab.round().pd())

pd.testing.assert_frame_equal(p_tab.round(2),
q_tab.round(2).pd())

pd.testing.assert_frame_equal(p_tab.round(-1),
q_tab.round(-1).pd())

dict_test = {'c1': -2,
'c2': -1,
'c3': -0,
'c4': 1,
'c5': 2,
'c6': 3,
'c7': 4}

q_res = q_tab.round(dict_test)
pd.testing.assert_frame_equal(p_tab.round(dict_test), q_res.pd())

pd.testing.assert_frame_equal(q_tab.dtypes.pd(), q_res.dtypes.pd())

q_res = q_tab.round(kx.toq(dict_test))
pd.testing.assert_frame_equal(p_tab.round(dict_test), q_res.pd())

pd.testing.assert_frame_equal(q_tab.dtypes.pd(), q_res.dtypes.pd())

with pytest.raises(TypeError):
q_tab.round(.1)

err_tab = pd.DataFrame({
"time": [
pd.Timestamp("2016-05-25 13:30:00.023"),
pd.Timestamp("2016-05-25 13:30:00.038"),
pd.Timestamp("2016-05-25 13:30:00.048"),
pd.Timestamp("2016-05-25 13:30:00.048"),
pd.Timestamp("2016-05-25 13:30:00.048")
],
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
"price": [51.95, 51.95, 720.77, 720.92, 98.0],
"quantity": [75, 155, 100, 100, 100]
})

q_err_tab = q('1!', kx.toq(err_tab))
with pytest.raises(TypeError):
q_tab.round(q_err_tab)


def test_pandas_min(q):
tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)')
df = tab.pd()
Expand Down
Loading