From 85a9e0829c8792f617d9be44d1764a5bf985c973 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Sat, 22 Jun 2024 18:18:46 -0700 Subject: [PATCH 1/3] special handling for object ararys --- src/_arraykit.c | 18 +++++++++++++++++- test/test_util.py | 5 +++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index db4da764..bf4e5fcd 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3573,7 +3573,7 @@ array_to_tuple_array(PyObject *Py_UNUSED(m), PyObject *a) i++; } } - else { // ndim == 1 + else if (PyArray_TYPE(input_array) != NPY_OBJECT) { // ndim == 1, not object while (p < p_end) { tuple = PyTuple_New(1); if (tuple == NULL) { @@ -3590,6 +3590,22 @@ array_to_tuple_array(PyObject *Py_UNUSED(m), PyObject *a) i++; } } + else { // ndim == 1, object + while (p < p_end) { + tuple = PyTuple_New(1); + if (tuple == NULL) { + goto error; + } + // scalar returned in is native PyObject from object arrays + item = *(PyObject**)PyArray_GETPTR1(input_array, i); + Py_INCREF(item); + // TODO: identify tuple + PyTuple_SET_ITEM(tuple, 0, item); // steals reference to item + *p++ = tuple; // assign with new ref, no incr needed + i++; + } + } + PyArray_CLEARFLAGS((PyArrayObject *)output, NPY_ARRAY_WRITEABLE); return output; error: diff --git a/test/test_util.py b/test/test_util.py index 0b651430..58092ea5 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -303,8 +303,9 @@ def test_array2d_to_array1d_1d_c(self) -> None: def test_array2d_to_array1d_1d_d(self) -> None: a1 = np.array([('a', 10), ('b', 30), ('c', 5)], dtype=object) - a2 = array_to_tuple_array(a1) - self.assertEqual(a2.tolist(), [('a', 10), ('b', 30), ('c', 5)]) + a2 = array_to_tuple_array(a1) # from 2d + a3 = array_to_tuple_array(a2) # from 1d + self.assertEqual(a3.tolist(), [('a', 10), ('b', 30), ('c', 5)]) def test_array2d_to_array1d_1d_e(self) -> None: a1 = np.array([True, False, True], dtype=object) From d44be7d8201cff8b514c2fa4365aeb65a9ddc521 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Sun, 23 Jun 2024 07:40:21 -0700 Subject: [PATCH 2/3] implement handling for tuples in object dtypes --- src/_arraykit.c | 41 +++++++++++++++++++++++++++++------------ test/test_util.py | 31 ++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index bf4e5fcd..fd55c7ec 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3592,20 +3592,22 @@ array_to_tuple_array(PyObject *Py_UNUSED(m), PyObject *a) } else { // ndim == 1, object while (p < p_end) { - tuple = PyTuple_New(1); - if (tuple == NULL) { - goto error; - } - // scalar returned in is native PyObject from object arrays item = *(PyObject**)PyArray_GETPTR1(input_array, i); - Py_INCREF(item); - // TODO: identify tuple - PyTuple_SET_ITEM(tuple, 0, item); // steals reference to item + Py_INCREF(item); // always incref + if (PyTuple_Check(item)) { + tuple = item; // do not double pack + } + else { + tuple = PyTuple_New(1); + if (tuple == NULL) { + goto error; + } + PyTuple_SET_ITEM(tuple, 0, item); // steals reference to item + } *p++ = tuple; // assign with new ref, no incr needed i++; } } - PyArray_CLEARFLAGS((PyArrayObject *)output, NPY_ARRAY_WRITEABLE); return output; error: @@ -3680,10 +3682,10 @@ ATT_iternext(ATTObject *self) { Py_DECREF(tuple); return NULL; } - PyTuple_SET_ITEM(tuple, j, item); // steals reference to item + PyTuple_SET_ITEM(tuple, j, item); // steals ref } } - else { // ndim == 1 + else if (PyArray_TYPE(array) != NPY_OBJECT) { // ndim == 1, not object tuple = PyTuple_New(1); if (tuple == NULL) { return NULL; @@ -3693,7 +3695,22 @@ ATT_iternext(ATTObject *self) { Py_DECREF(tuple); return NULL; } - PyTuple_SET_ITEM(tuple, 0, item); // steals reference to item + PyTuple_SET_ITEM(tuple, 0, item); // steals ref + } + else { // ndim == 1, object + item = *(PyObject**)PyArray_GETPTR1(array, i); + Py_INCREF(item); // always incref + if (PyTuple_Check(item)) { + tuple = item; // do not double pack + } + else { + tuple = PyTuple_New(1); + if (tuple == NULL) { + Py_DECREF(item); + return NULL; + } + PyTuple_SET_ITEM(tuple, 0, item); // steals ref + } } self->pos++; return tuple; diff --git a/test/test_util.py b/test/test_util.py index 58092ea5..93b819ec 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -286,34 +286,47 @@ def test_array_deepcopy_h(self) -> None: a2 = array_deepcopy(a1, ()) #--------------------------------------------------------------------------- - def test_array2d_to_array1d_1d_a(self) -> None: + def test_array_to_tuple_array_1d_a(self) -> None: a1 = np.arange(10) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,)]) - def test_array2d_to_array1d_1d_b(self) -> None: + def test_array_to_tuple_array_1d_b(self) -> None: a1 = np.array(['aaa', 'b', 'ccc']) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [('aaa',), ('b',), ('ccc',)]) - def test_array2d_to_array1d_1d_c(self) -> None: + def test_array_to_tuple_array_1d_c(self) -> None: a1 = np.array([None, 'b', 30]) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [(None,), ('b',), (30,)]) - def test_array2d_to_array1d_1d_d(self) -> None: + def test_array_to_tuple_array_1d_d(self) -> None: a1 = np.array([('a', 10), ('b', 30), ('c', 5)], dtype=object) a2 = array_to_tuple_array(a1) # from 2d + self.assertEqual(a2.tolist(), [('a', 10), ('b', 30), ('c', 5)]) a3 = array_to_tuple_array(a2) # from 1d self.assertEqual(a3.tolist(), [('a', 10), ('b', 30), ('c', 5)]) - def test_array2d_to_array1d_1d_e(self) -> None: + def test_array_to_tuple_array_1d_e(self) -> None: a1 = np.array([True, False, True], dtype=object) a2 = array_to_tuple_array(a1) self.assertIs(a2[0][0].__class__, bool) self.assertEqual(a2.tolist(), [(True,), (False,), (True,)]) - def test_array2d_to_array1d_b(self) -> None: + def test_array_to_tuple_array_1d_f(self) -> None: + a1 = np.array([None, None, None], dtype=object) + a1[0] = 3 + a1[1] = ('a', 30) + a1[2] = (None, True, 90000000) + + a2 = array_to_tuple_array(a1) + self.assertEqual(a2.tolist(), [(3,), ('a', 30), (None, True, 90000000)]) + + a3 = array_to_tuple_array(a2) + self.assertEqual(a3.tolist(), [(3,), ('a', 30), (None, True, 90000000)]) + + def test_array_to_tuple_array_b(self) -> None: a1 = np.arange(10, dtype=np.int64).reshape(5, 2) result = array_to_tuple_array(a1) assert isinstance(result[0], tuple) @@ -323,18 +336,18 @@ def test_array2d_to_array1d_b(self) -> None: self.assertEqual(tuple(result), ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9))) - def test_array2d_to_array1d_c(self) -> None: + def test_array_to_tuple_array_c(self) -> None: a1 = np.array([["a", "b"], ["ccc", "ddd"], ["ee", "ff"]]) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [('a', 'b'), ('ccc', 'ddd'), ('ee', 'ff')]) - def test_array2d_to_array1d_d(self) -> None: + def test_array_to_tuple_array_d(self) -> None: a1 = np.array([[3, 5], [10, 20], [7, 2]], dtype=np.uint8) a2 = array_to_tuple_array(a1) self.assertEqual(a2.tolist(), [(3, 5), (10, 20), (7, 2)]) self.assertIs(type(a2[0][0]), np.uint8) - def test_array2d_to_array1d_e(self) -> None: + def test_array_to_tuple_array_e(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(4, 5) result = array_to_tuple_array(a1) self.assertEqual(result.tolist(), [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9), (10, 11, 12, 13, 14), (15, 16, 17, 18, 19)]) From f76e22e73447844a5ec2db8cc252f4a22efaaa48 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Sun, 23 Jun 2024 07:45:35 -0700 Subject: [PATCH 3/3] additional tests --- test/test_util.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 93b819ec..9e00f142 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -353,18 +353,18 @@ def test_array_to_tuple_array_e(self) -> None: self.assertEqual(result.tolist(), [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9), (10, 11, 12, 13, 14), (15, 16, 17, 18, 19)]) #--------------------------------------------------------------------------- - def test_array2d_tuple_iter_a(self) -> None: + def test_array_to_tuple_iter_a(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(4, 5) result = list(array_to_tuple_iter(a1)) self.assertEqual(len(result), 4) self.assertEqual(result, [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9), (10, 11, 12, 13, 14), (15, 16, 17, 18, 19)]) - def test_array2d_tuple_iter_b(self) -> None: + def test_array_to_tuple_iter_b(self) -> None: a1 = np.arange(20, dtype=np.int64).reshape(10, 2) result = list(array_to_tuple_iter(a1)) self.assertEqual(result, [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9), (10, 11), (12, 13), (14, 15), (16, 17), (18, 19)]) - def test_array2d_tuple_iter_c(self) -> None: + def test_array_to_tuple_iter_c(self) -> None: a1 = np.array([['aaa', 'bb'], ['c', 'dd'], ['ee', 'fffff']]) it = array_to_tuple_iter(a1) self.assertEqual(it.__length_hint__(), 3) @@ -377,20 +377,20 @@ def test_array2d_tuple_iter_c(self) -> None: with self.assertRaises(StopIteration): next(it) - def test_array2d_tuple_iter_d(self) -> None: + def test_array_to_tuple_iter_d(self) -> None: a1 = np.array([['aaa', 'bb'], ['c', 'dd'], ['ee', 'fffff']]) it = array_to_tuple_iter(a1) # __reversed__ not implemented with self.assertRaises(TypeError): reversed(it) - def test_array2d_tuple_iter_e(self) -> None: + def test_array_to_tuple_iter_e(self) -> None: a1 = np.array([[None, 'bb'], [None, 'dd'], [3, None]]) it = array_to_tuple_iter(a1) del a1 self.assertEqual(list(it), [(None, 'bb'), (None, 'dd'), (3, None)]) - def test_array2d_tuple_iter_f(self) -> None: + def test_array_to_tuple_iter_f(self) -> None: a1 = np.array([[None, 'bb'], [None, 'dd'], [3, None]]) it1 = array_to_tuple_iter(a1) del a1 @@ -398,7 +398,7 @@ def test_array2d_tuple_iter_f(self) -> None: self.assertEqual(list(it1), [(None, 'bb'), (None, 'dd'), (3, None)]) self.assertEqual(list(it2), []) # expected behavior - def test_array2d_tuple_iter_g(self) -> None: + def test_array_to_tuple_iter_g(self) -> None: a1 = np.array([[None, 'bb'], [None, 'dd'], [3, None]]) it1 = array_to_tuple_iter(a1) it2 = array_to_tuple_iter(a1) @@ -406,23 +406,33 @@ def test_array2d_tuple_iter_g(self) -> None: self.assertEqual(list(it1), [(None, 'bb'), (None, 'dd'), (3, None)]) self.assertEqual(list(it2), [(None, 'bb'), (None, 'dd'), (3, None)]) - def test_array2d_tuple_iter_1d_a(self) -> None: + def test_array_to_tuple_iter_1d_a(self) -> None: a1 = np.array(['bb', 'c', 'aaa']) result = list(array_to_tuple_iter(a1)) self.assertEqual(len(result), 3) self.assertEqual(result, [('bb',), ('c',), ('aaa',)]) - def test_array2d_tuple_iter_1d_b(self) -> None: + def test_array_to_tuple_iter_1d_b(self) -> None: a1 = np.array([20, -1, 8]) result = list(array_to_tuple_iter(a1)) self.assertEqual(len(result), 3) self.assertEqual(result, [(20,), (-1,), (8,)]) - def test_array2d_tuple_iter_1d_c(self) -> None: + def test_array_to_tuple_iter_1d_c(self) -> None: a1 = np.array([('a', 4), ('c', -1), ('d', 8)], dtype=object) - result = list(array_to_tuple_iter(a1)) - self.assertEqual(len(result), 3) - self.assertEqual(result, [('a', 4), ('c', -1), ('d', 8)]) + a2 = list(array_to_tuple_iter(a1)) + self.assertEqual(len(a2), 3) + self.assertEqual(a2, [('a', 4), ('c', -1), ('d', 8)]) + + def test_array_to_tuple_iter_1d_d(self) -> None: + a1 = np.array([None, None, None], dtype=object) + a1[0] = 3 + a1[1] = ('a', 30) + a1[2] = (None, True, 90000000) + + a2 = list(array_to_tuple_iter(a1)) + self.assertEqual(a2, [(3,), ('a', 30), (None, True, 90000000)]) + #---------------------------------------------------------------------------