diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ef59c86a21598..bf396b8124136 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -403,6 +403,7 @@ Other ^^^^^ - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) +- Bug in Cython :meth:`StringHashTable._unique` used ephemeral repr values when UnicodeEncodeError was raised (:issue:`45929`) - Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`) - Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`) - diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 1cf5d734705af..79a0dfc6d723d 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -1128,6 +1128,7 @@ cdef class StringHashTable(HashTable): use_na_value = na_value is not None # assign pointers and pre-filter out missing (if ignore_na) + keep_rval_refs = [] vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -1144,7 +1145,9 @@ cdef class StringHashTable(HashTable): try: v = get_c_string(val) except UnicodeEncodeError: - v = get_c_string(repr(val)) + rval = repr(val) + keep_rval_refs.append(rval) + v = get_c_string(rval) vecs[i] = v # compute