Skip to content

Commit

Permalink
fix: ak.values_astype now turns 'unknown' type into the requested typ…
Browse files Browse the repository at this point in the history
…e. (#2196)

* fix: ak.values_astype now turns 'unknown' type into the requested type.

* Added 'including_unknown' argument.

* @agoose77's correction to the return type annotation.

Co-authored-by: Angus Hollands <[email protected]>

---------

Co-authored-by: Angus Hollands <[email protected]>
  • Loading branch information
jpivarski and agoose77 authored Feb 4, 2023
1 parent e350d63 commit 20b3da8
Show file tree
Hide file tree
Showing 17 changed files with 289 additions and 231 deletions.
4 changes: 2 additions & 2 deletions src/awkward/_do.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ def flatten(layout: Content, axis: int = 1) -> Content:
return flattened


def numbers_to_type(layout: Content, name: str) -> Content:
return layout._numbers_to_type(name)
def numbers_to_type(layout: Content, name: str, including_unknown: bool) -> Content:
return layout._numbers_to_type(name, including_unknown)


def fill_none(layout: Content, value: Content) -> Content:
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@ def _fill_none(self, value: Content) -> Content:
def _local_index(self, axis, depth):
return self.to_ByteMaskedArray()._local_index(axis, depth)

def _numbers_to_type(self, name):
return self.to_ByteMaskedArray()._numbers_to_type(name)
def _numbers_to_type(self, name, including_unknown):
return self.to_ByteMaskedArray()._numbers_to_type(name, including_unknown)

def _is_unique(self, negaxis, starts, parents, outlength):
if self._mask.length == 0:
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,10 +699,10 @@ def _local_index(self, axis, depth):
outindex, out, parameters=self._parameters
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.ByteMaskedArray(
self._mask,
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
self._valid_when,
parameters=self._parameters,
)
Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ def _is_equal_to(self, other: Self, index_dtype: bool, numpyarray: bool) -> bool
def _repr(self, indent: str, pre: str, post: str) -> str:
raise ak._errors.wrap_error(NotImplementedError)

def _numbers_to_type(self, name: str) -> Self:
def _numbers_to_type(self, name: str, including_unknown: bool) -> Content:
raise ak._errors.wrap_error(NotImplementedError)

def _fill_none(self, value: Content) -> Content:
Expand Down
9 changes: 5 additions & 4 deletions src/awkward/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,11 @@ def _local_index(self, axis, depth):
np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})")
)

def _numbers_to_type(self, name):
return ak.contents.EmptyArray(
parameters=self._parameters, backend=self._backend
)
def _numbers_to_type(self, name, including_unknown):
if including_unknown:
return self.to_NumpyArray(ak.types.numpytype.primitive_to_dtype(name))
else:
return self

def _is_unique(self, negaxis, starts, parents, outlength):
return True
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,10 +695,10 @@ def _unique_index(self, index, sorted=True):

return next[0 : length[0]]

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.IndexedArray(
self._index,
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
parameters=self._parameters,
)

Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,10 +864,10 @@ def _is_subrange_equal(self, starts, stops, length, sorted=True):
nextstarts, nextstops, nextstarts.length, False
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.IndexedOptionArray(
self._index,
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
parameters=self._parameters,
)

Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,11 +1157,11 @@ def _local_index(self, axis, depth):
self._content._local_index(axis, depth + 1),
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.ListArray(
self._starts,
self._stops,
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
parameters=self._parameters,
)

Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,10 +780,10 @@ def _local_index(self, axis, depth):
self._offsets, self._content._local_index(axis, depth + 1)
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.ListOffsetArray(
self._offsets,
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
parameters=self._parameters,
)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ def _as_unique_strings(self, offsets):

return out2, nextoffsets[: outlength[0]]

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
if (
self.parameter("__array__") == "string"
or self.parameter("__array__") == "bytestring"
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,10 +725,10 @@ def _local_index(self, axis, depth):
backend=self._backend,
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
contents = []
for x in self._contents:
contents.append(x._numbers_to_type(name))
contents.append(x._numbers_to_type(name, including_unknown))
return ak.contents.RecordArray(
contents,
self._fields,
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,9 +728,9 @@ def _local_index(self, axis, depth):
self._content._local_index(axis, depth + 1), self._size, self._length
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.RegularArray(
self._content._numbers_to_type(name),
self._content._numbers_to_type(name, including_unknown),
self._size,
self._length,
parameters=self._parameters,
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,10 +1174,10 @@ def _combinations(self, n, replacement, recordlookup, parameters, axis, depth):
parameters=self._parameters,
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
contents = []
for x in self._contents:
contents.append(x._numbers_to_type(name))
contents.append(x._numbers_to_type(name, including_unknown))
return ak.contents.UnionArray(
self._tags,
self._index,
Expand Down
5 changes: 3 additions & 2 deletions src/awkward/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,10 @@ def _local_index(self, axis, depth):
self._content._local_index(axis, depth), parameters=self._parameters
)

def _numbers_to_type(self, name):
def _numbers_to_type(self, name, including_unknown):
return ak.contents.UnmaskedArray(
self._content._numbers_to_type(name), parameters=self._parameters
self._content._numbers_to_type(name, including_unknown),
parameters=self._parameters,
)

def _is_unique(self, negaxis, starts, parents, outlength):
Expand Down
19 changes: 14 additions & 5 deletions src/awkward/operations/ak_values_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
np = NumpyMetadata.instance()


def values_astype(array, to, *, highlevel=True, behavior=None):
def values_astype(array, to, *, including_unknown=False, highlevel=True, behavior=None):
"""
Args:
array: Array-like data (anything #ak.to_layout recognizes).
to (dtype or dtype specifier): Type to convert the numbers into.
including_unknown (bool): If True, the `unknown` type is considered
a value type and is converted to the specified dtype; if False,
`unknown` will remain `unknown`.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.contents.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
Expand Down Expand Up @@ -50,14 +53,20 @@ def values_astype(array, to, *, highlevel=True, behavior=None):
"""
with ak._errors.OperationErrorContext(
"ak.values_astype",
{"array": array, "to": to, "highlevel": highlevel, "behavior": behavior},
{
"array": array,
"to": to,
"including_unknown": including_unknown,
"highlevel": highlevel,
"behavior": behavior,
},
):
return _impl(array, to, highlevel, behavior)
return _impl(array, to, including_unknown, highlevel, behavior)


def _impl(array, to, highlevel, behavior):
def _impl(array, to, including_unknown, highlevel, behavior):
to_dtype = np.dtype(to)
to_str = ak.types.numpytype.dtype_to_primitive(to_dtype)
layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)
out = ak._do.numbers_to_type(layout, to_str)
out = ak._do.numbers_to_type(layout, to_str, including_unknown)
return ak._util.wrap(out, behavior, highlevel, like=array)
Loading

0 comments on commit 20b3da8

Please sign in to comment.