Skip to content

Commit

Permalink
Fix Numpy <---> TensorProto Conversion (#60)
Browse files Browse the repository at this point in the history
* Fixed conversion for scalar types, both from and to TensorProto
* Fixed conversion from nested python lists
* Fixed conversion for DT_STRING, DT_HALF, DT_COMPLEX64 and DT_COMPLEX128
* Added tests to test Numpy to TensorProto conversion
* Fix tests
* Bumped version from 2.3.1 to 2.3.2
  • Loading branch information
Valenzione authored Jun 29, 2020
1 parent b8417c0 commit f27c66a
Show file tree
Hide file tree
Showing 7 changed files with 320 additions and 134 deletions.
14 changes: 7 additions & 7 deletions hydrosdk/contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from hydro_serving_grpc.contract import ModelContract, ModelSignature, ModelField, DataProfileType
from hydro_serving_grpc.tf.types_pb2 import *

from hydrosdk.data.types import name2dtype, shape_to_proto, PY_TO_DTYPE, np2proto_dtype, proto2np_dtype
from hydrosdk.data.types import alias_to_proto_dtype, shape_to_proto, PY_TO_DTYPE, np_to_proto_dtype, proto_to_np_dtype


class ContractViolationException(Exception):
Expand Down Expand Up @@ -74,7 +74,7 @@ def field_from_dict(field_name: str, field_dict: dict) -> ModelField:
subfields_buffer.append(subfield)
result_subfields = subfields_buffer
else:
result_dtype = name2dtype(dtype)
result_dtype = alias_to_proto_dtype(dtype)

if result_dtype is not None:
result_field = ModelField(
Expand Down Expand Up @@ -374,14 +374,14 @@ def parse_field(name, dtype, shape, profile=ProfilingType.NONE):
else:
if dtype in DataType.keys(): # exact name e.g. DT_STRING
result_dtype = dtype
elif dtype in DataType.values():
elif dtype in DataType.values(): # int value of DataType
result_dtype = dtype
elif isinstance(dtype, str): # string alias
result_dtype = name2dtype(dtype)
elif isinstance(dtype, str): # string alias e.g. 'double'
result_dtype = alias_to_proto_dtype(dtype)
elif isinstance(dtype, type): # type. could be python or numpy type
result_dtype = PY_TO_DTYPE.get(dtype)
if not result_dtype:
result_dtype = np2proto_dtype(dtype)
result_dtype = np_to_proto_dtype(dtype)
else:
result_dtype = DT_INVALID

Expand Down Expand Up @@ -590,7 +590,7 @@ def mock_input_data(signature: ModelSignature):
simple_shape = [1]
field_shape = tuple(np.abs(simple_shape))
size = reduce(operator.mul, field_shape)
npdtype = proto2np_dtype(field.dtype)
npdtype = proto_to_np_dtype(field.dtype)
if field.dtype == DT_BOOL:
x = (np.random.randn(*field_shape) >= 0).astype(np.bool)
elif field.dtype in [DT_FLOAT, DT_HALF, DT_DOUBLE, DT_COMPLEX128, DT_COMPLEX64]:
Expand Down
165 changes: 124 additions & 41 deletions hydrosdk/data/conversions.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from typing import Union, Dict, List
from typing import Dict, List, Iterable

import numpy as np
import pandas as pd
from hydro_serving_grpc import TensorProto, DataType, TensorShapeProto
from hydro_serving_grpc import TensorProto, DataType, TensorShapeProto, DT_STRING, DT_HALF, DT_COMPLEX64, DT_COMPLEX128
from hydro_serving_grpc.contract import ModelSignature
from pandas.core.common import flatten

from hydrosdk.data.types import NP_TO_HS_DTYPE, DTYPE_TO_FIELDNAME, np2proto_shape, PY_TO_DTYPE, find_in_list_by_name, proto2np_dtype
from hydrosdk.data.types import np_to_proto_dtype, DTYPE_TO_FIELDNAME, find_in_list_by_name, proto_to_np_dtype


def tensor_proto_to_py(t: TensorProto):
"""
Converts tensor proto into corresponding python object
Converts tensor proto into a corresponding python object - list or scalar
:param t:
:return:
"""
Expand All @@ -25,74 +26,157 @@ def tensor_proto_to_py(t: TensorProto):
return value[0]


def tensor_proto_to_nparray(t: TensorProto):
def list_to_tensor_proto(data: List, proto_dtype: DataType, proto_shape: TensorShapeProto) -> TensorProto:
"""
Creates Numpy array given dtype, shape and values from TensorProto object
Converts data in a form of a Python List into a TensorProto object
:param data: List with data
:param proto_dtype: DataType of a future TensorProto
:param proto_shape: TensorShapeProto of a future TensorProto
:return: Same data but in a TensorProto object
"""
# We can pack only flattened lists into TensorProto, so we need to flatten the list
flattened_list = flatten(data)
tensor_proto_parameters = {
DTYPE_TO_FIELDNAME[proto_dtype]: flattened_list,
"dtype": proto_dtype,
"tensor_shape": proto_shape
}
return TensorProto(**tensor_proto_parameters)


def tensor_proto_to_np(t: TensorProto):
"""
Creates either np.array or scalar with Numpy dtype based on
data type, shape and values from TensorProto object
:param t:
:return:
"""
array_shape = [dim.size for dim in t.tensor_shape.dim]
np_dtype = proto2np_dtype(t.dtype)
value = getattr(t, DTYPE_TO_FIELDNAME[t.dtype])

nparray = np.array(value, dtype=np_dtype)
np_dtype = proto_to_np_dtype(t.dtype)
proto_values = getattr(t, DTYPE_TO_FIELDNAME[t.dtype])

if t.dtype == DT_HALF:
x = np.fromiter(proto_values, dtype=np.uint16).view(np.float16)
elif t.dtype == DT_STRING:
x = np.array([s.decode("utf-8") for s in proto_values])
elif t.dtype == DT_COMPLEX64 or t.dtype == DT_COMPLEX128:
it = iter(proto_values)
x = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=np_dtype)
else:
x = np.array(proto_values, dtype=np_dtype)

# If no dims specified in TensorShapeProto, then it is scalar
if array_shape:
return nparray.reshape(*array_shape)
return x.reshape(*array_shape)
else:
return np.asscalar(nparray)
return x.flatten()[0]


def nparray_to_tensor_proto(x: np.array):
def np_to_tensor_proto(x) -> TensorProto:
"""
Creates TensorProto object with specified dtype, shape and values under respective fieldname from np.array
:param x:
Creates TensorProto object from Numpy ndarray or scalar with inferred TensorProtoShape and DataType
:param x: Union[np.array, np.ScalarType]
:return:
"""
proto_dtype = NP_TO_HS_DTYPE.get(x.dtype.type)
if proto_dtype is None:
raise ValueError(f"Couldn't convert numpy dtype {x.dtype.type} to one of available TensorProto dtypes")
if isinstance(x, np.ScalarType):
return scalar_to_tensor_proto(x)
elif isinstance(x, np.ndarray):
return nparray_to_tensor_proto(x)
else:
raise TypeError(f"Unsupported object {x}")


def nparray_to_tensor_proto(x: np.array) -> TensorProto:
"""
Creates TensorProto object from Numpy ndarray
with TensorProtoShape and DataType inferred from the latter
:param x: Data in form ofa numpy ndarray
:return: Same data packed into a TensorProto object
"""

if x.dtype.isbuiltin != 1 and x.dtype.type != np.str_:
raise ValueError(f"{x.dtype} is not supported."
f" Dtypes not compiled into numpy are not supported, except for np.str.")

proto_dtype = np_to_proto_dtype(x.dtype.type)
proto_shape = tensor_shape_proto_from_tuple(x.shape)

if proto_dtype == DT_HALF:
proto_values = x.view(np.uint16).flatten()
elif proto_dtype == DT_STRING:
proto_values = [s.encode("utf-8") for s in x.flatten()]
elif proto_dtype == DT_COMPLEX64 or proto_dtype == DT_COMPLEX128:
proto_values = [v.item() for c_number in x.flatten() for v in [c_number.real, c_number.imag]]
else:
proto_values = x.flatten()

kwargs = {
DTYPE_TO_FIELDNAME[proto_dtype]: x.flatten(),
DTYPE_TO_FIELDNAME[proto_dtype]: proto_values,
"dtype": proto_dtype,
"tensor_shape": np2proto_shape(x.shape)
"tensor_shape": proto_shape
}

return TensorProto(**kwargs)


def list_to_tensor_proto(data: List, dtype: str, shape: TensorShapeProto):
proto_dtype = DataType.Value(DataType.Name(dtype))
tensor_proto_parameters = {
DTYPE_TO_FIELDNAME[proto_dtype]: data,
def scalar_to_tensor_proto(x: np.ScalarType) -> TensorProto:
"""
Creates TensorProto object from a scalar with a Numpy dtype
with TensorProtoShape and DataType inferred from the latter
:param x: Scalar value with a Numpy dtype
:return: Same value but packed into a TensorProto object
"""
proto_dtype = np_to_proto_dtype(type(x))

if proto_dtype == DT_HALF:
proto_values = [np.array(x, dtype=np.float16).view(np.uint16)]
elif proto_dtype == DT_STRING:
proto_values = [x.encode("utf-8")]
elif proto_dtype == DT_COMPLEX64 or proto_dtype == DT_COMPLEX128:
proto_values = [x.real, x.imag]
else:
proto_values = [x]

kwargs = {
DTYPE_TO_FIELDNAME[proto_dtype]: proto_values,
"dtype": proto_dtype,
"tensor_shape": shape
"tensor_shape": TensorShapeProto()
}
return TensorProto(**tensor_proto_parameters)
return TensorProto(**kwargs)


def convert_inputs_to_tensor_proto(inputs: Union[Dict, pd.DataFrame], signature: ModelSignature) -> dict:
def tensor_shape_proto_from_tuple(shape: Iterable[int]) -> TensorShapeProto:
"""
Helper function to transform shape in the form of a tuple (Numpy shape representation) into a TensorProtoShape
:param shape: Shape in a tuple form
:return: same shape but in a TensorShapeProto object
"""
return TensorShapeProto(dim=[TensorShapeProto.Dim(size=s) for s in shape])

:param inputs:
:param signature:
:return:

def convert_inputs_to_tensor_proto(inputs: Dict, signature: ModelSignature) -> Dict[str, TensorProto]:
"""
Generate Dict[str, TensorProto] from pd.DataFrame or Dict[str, Union[np.array, np.ScalarType]]
Converts inputs into a representation of data where each field
of a signature is represented by a valid TensorProto object.
:param inputs: Dict, where keys are names of signature fields and
values are data in either Numpy or Python form, or alternatively,
pd.DataFrame, where columns are names of fields and column values are data.
:param signature: ModelVersion signature with names, shapes and dtypes
of fields into which `inputs` are converted into
:return: Dictionary with TensorProtos to be used in forming a PredictRequest
"""
tensors = {}
if isinstance(inputs, dict):
for key, value in inputs.items():

if type(value) in PY_TO_DTYPE:
# If we got a single val, we can perform the same logic in the next steps if we create List[value] from it
value = [value]

if isinstance(value, list): # x: [1,2,3,4]
signature_field = find_in_list_by_name(some_list=signature.inputs, name=key)
tensors[key] = list_to_tensor_proto(value, signature_field.dtype, signature_field.shape)

elif isinstance(value, np.ndarray) or isinstance(value, np.ScalarType):
# Support both np.ndarray and np.scalar since they support same operations on them
elif isinstance(value, np.ScalarType):
# This works for all scalars, including python int, float, etc.
tensors[key] = scalar_to_tensor_proto(value)
elif isinstance(value, np.ndarray):
tensors[key] = nparray_to_tensor_proto(value)
else:
raise TypeError("Unsupported objects in dict values {}".format(type(value)))
Expand All @@ -101,8 +185,7 @@ def convert_inputs_to_tensor_proto(inputs: Union[Dict, pd.DataFrame], signature:
for key, value in dict(inputs).items():
tensors[key] = nparray_to_tensor_proto(value.ravel())
else:
raise ValueError(
"Conversion failed. Expected [pandas.DataFrame, dict[str, numpy.ndarray], dict[str, list], dict[str, python_primitive]], got {}".format(
type(inputs)))
raise ValueError(f"Conversion failed. Expected [pandas.DataFrame, dict[str, numpy.ndarray],\
dict[str, list], dict[str, np.ScalarType]], got {type(inputs)}")

return tensors
Loading

0 comments on commit f27c66a

Please sign in to comment.