Skip to content

Commit

Permalink
Fix handling of np.int32 and np.float32 in df_to_table function (#13)
Browse files Browse the repository at this point in the history
Due to incorrect handling of np.int32 and np.float32, the resulting type
will fallback to string type in `df_to_table`
  • Loading branch information
aria authored Oct 7, 2022
1 parent 82f86cf commit 8882f9b
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 15 deletions.
8 changes: 4 additions & 4 deletions gen/python/grpc/caraml/upi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def df_to_table(df: pd.DataFrame, table_name: str) -> table_pb2.Table:
continue

dtype = df_dtypes[i - 1]
if dtype == np.float64:
if dtype == np.float64 or dtype == np.float32:
if isnan(value):
values.append(table_pb2.Value(is_null=True))
continue

values.append(
table_pb2.Value(double_value=float(value)))
elif dtype == np.int64:
elif dtype == np.int64 or dtype == np.int32:
values.append(
table_pb2.Value(integer_value=int(value)))
else:
Expand Down Expand Up @@ -124,9 +124,9 @@ def dtype_to_upi_type(dtype):
Returns: upi type
"""
if dtype == np.int64:
if dtype == np.int64 or dtype == np.int32:
return type_pb2.TYPE_INTEGER
if dtype == np.float64:
if dtype == np.float64 or dtype == np.float32:
return type_pb2.TYPE_DOUBLE
# any other type will be treated as string
return type_pb2.TYPE_STRING
58 changes: 47 additions & 11 deletions gen/python/grpc/test/utils_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest
import pandas as pd
from caraml.upi.utils import df_to_table, table_to_df
Expand All @@ -11,7 +12,18 @@
columns=[table_pb2.Column(name="int_col", type=type_pb2.TYPE_INTEGER)],
rows=[table_pb2.Row(row_id="0",
values=[
table_pb2.Value(integer_value=111)])])
table_pb2.Value(integer_value=111)])]),
False,
),
(
"int_table_np_int32",
pd.DataFrame(data=[111], columns=["int_col"], index=["0"]).astype(np.int32),
table_pb2.Table(name="int_table_np_int32",
columns=[table_pb2.Column(name="int_col", type=type_pb2.TYPE_INTEGER)],
rows=[table_pb2.Row(row_id="0",
values=[
table_pb2.Value(integer_value=111)])]),
True,
),
(
"float_table",
Expand All @@ -21,7 +33,8 @@
rows=[table_pb2.Row(row_id="0",
values=[
table_pb2.Value(
double_value=111.11)])])
double_value=111.11)])]),
False,
),
(
"string_table",
Expand All @@ -31,7 +44,8 @@
rows=[table_pb2.Row(row_id="0",
values=[
table_pb2.Value(
string_value="111.11")])])
string_value="111.11")])]),
False,
),
# pandas will convert int column containing null to double
(
Expand All @@ -44,7 +58,8 @@
table_pb2.Value(double_value=111)]),
table_pb2.Row(row_id="1",
values=[table_pb2.Value(is_null=True)]),
])
]),
False,
),
(
"float_table_with_null",
Expand All @@ -57,7 +72,22 @@
double_value=111.11)]),
table_pb2.Row(row_id="1",
values=[table_pb2.Value(is_null=True)]),
])
]),
False,
),
(
"float_table_with_null_np_float32",
pd.DataFrame(data=[[111], [None]], columns=["float_col"], index=["0", "1"]).astype(np.float32),
table_pb2.Table(name="float_table_with_null_np_float32",
columns=[table_pb2.Column(name="float_col", type=type_pb2.TYPE_DOUBLE)],
rows=[table_pb2.Row(row_id="0",
values=[
table_pb2.Value(
double_value=111)]),
table_pb2.Row(row_id="1",
values=[table_pb2.Value(is_null=True)]),
]),
True,
),
(
"string_table_with_null",
Expand All @@ -70,7 +100,8 @@
string_value="111.11")]),
table_pb2.Row(row_id="1",
values=[table_pb2.Value(is_null=True)]),
])
]),
False,
),
(
"table_with_custom_index",
Expand All @@ -85,19 +116,24 @@
values=[
table_pb2.Value(
string_value="222.22")]),
])
]),
False,
),
]


@pytest.mark.parametrize("name,df,exp", conversion_test_cases)
def test_df_to_table(name, df, exp):
@pytest.mark.parametrize("name,df,exp,cast", conversion_test_cases)
def test_df_to_table(name, df, exp, cast):
table = df_to_table(df, name)
assert table == exp


@pytest.mark.parametrize("exp_name,exp_df,table", conversion_test_cases)
def test_table_to_df(exp_name, exp_df, table):
@pytest.mark.parametrize("exp_name,exp_df,table,cast", conversion_test_cases)
def test_table_to_df(exp_name, exp_df, table, cast):
if cast:
# skip test that involve casting
return

df, name = table_to_df(table)
assert exp_df.equals(df)
assert name == exp_name

0 comments on commit 8882f9b

Please sign in to comment.