From 192a242135cbaf45fac94c145e1d37d23c4f5095 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 10:40:52 -0800
Subject: [PATCH 01/14] move non-tempita code out of tempita

---
 pandas/_libs/hashtable.pyx                | 166 ++++++++++++++++++++++
 pandas/_libs/hashtable_func_helper.pxi.in | 166 ----------------------
 2 files changed, 166 insertions(+), 166 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index ccac3d0b50d45..0e24d34a0730f 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -123,3 +123,169 @@ cdef class ObjectFactorizer(Factorizer):
                                        self.count, na_sentinel, na_value)
         self.count = len(self.uniques)
         return labels
+
+ctypedef fused htfunc_t:
+    numeric_object_t
+    complex128_t
+    complex64_t
+
+
+cpdef value_count(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
+    if htfunc_t is object:
+        return value_count_object(values, dropna, mask=mask)
+
+    elif htfunc_t is int8_t:
+        return value_count_int8(values, dropna, mask=mask)
+    elif htfunc_t is int16_t:
+        return value_count_int16(values, dropna, mask=mask)
+    elif htfunc_t is int32_t:
+        return value_count_int32(values, dropna, mask=mask)
+    elif htfunc_t is int64_t:
+        return value_count_int64(values, dropna, mask=mask)
+
+    elif htfunc_t is uint8_t:
+        return value_count_uint8(values, dropna, mask=mask)
+    elif htfunc_t is uint16_t:
+        return value_count_uint16(values, dropna, mask=mask)
+    elif htfunc_t is uint32_t:
+        return value_count_uint32(values, dropna, mask=mask)
+    elif htfunc_t is uint64_t:
+        return value_count_uint64(values, dropna, mask=mask)
+
+    elif htfunc_t is float64_t:
+        return value_count_float64(values, dropna, mask=mask)
+    elif htfunc_t is float32_t:
+        return value_count_float32(values, dropna, mask=mask)
+
+    elif htfunc_t is complex128_t:
+        return value_count_complex128(values, dropna, mask=mask)
+    elif htfunc_t is complex64_t:
+        return value_count_complex64(values, dropna, mask=mask)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+cpdef duplicated(ndarray[htfunc_t] values,
+                 object keep="first",
+                 const uint8_t[:] mask=None):
+    if htfunc_t is object:
+        return duplicated_object(values, keep, mask=mask)
+
+    elif htfunc_t is int8_t:
+        return duplicated_int8(values, keep, mask=mask)
+    elif htfunc_t is int16_t:
+        return duplicated_int16(values, keep, mask=mask)
+    elif htfunc_t is int32_t:
+        return duplicated_int32(values, keep, mask=mask)
+    elif htfunc_t is int64_t:
+        return duplicated_int64(values, keep, mask=mask)
+
+    elif htfunc_t is uint8_t:
+        return duplicated_uint8(values, keep, mask=mask)
+    elif htfunc_t is uint16_t:
+        return duplicated_uint16(values, keep, mask=mask)
+    elif htfunc_t is uint32_t:
+        return duplicated_uint32(values, keep, mask=mask)
+    elif htfunc_t is uint64_t:
+        return duplicated_uint64(values, keep, mask=mask)
+
+    elif htfunc_t is float64_t:
+        return duplicated_float64(values, keep, mask=mask)
+    elif htfunc_t is float32_t:
+        return duplicated_float32(values, keep, mask=mask)
+
+    elif htfunc_t is complex128_t:
+        return duplicated_complex128(values, keep, mask=mask)
+    elif htfunc_t is complex64_t:
+        return duplicated_complex64(values, keep, mask=mask)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
+    if htfunc_t is object:
+        return ismember_object(arr, values)
+
+    elif htfunc_t is int8_t:
+        return ismember_int8(arr, values)
+    elif htfunc_t is int16_t:
+        return ismember_int16(arr, values)
+    elif htfunc_t is int32_t:
+        return ismember_int32(arr, values)
+    elif htfunc_t is int64_t:
+        return ismember_int64(arr, values)
+
+    elif htfunc_t is uint8_t:
+        return ismember_uint8(arr, values)
+    elif htfunc_t is uint16_t:
+        return ismember_uint16(arr, values)
+    elif htfunc_t is uint32_t:
+        return ismember_uint32(arr, values)
+    elif htfunc_t is uint64_t:
+        return ismember_uint64(arr, values)
+
+    elif htfunc_t is float64_t:
+        return ismember_float64(arr, values)
+    elif htfunc_t is float32_t:
+        return ismember_float32(arr, values)
+
+    elif htfunc_t is complex128_t:
+        return ismember_complex128(arr, values)
+    elif htfunc_t is complex64_t:
+        return ismember_complex64(arr, values)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
+    # TODO(cython3): use const htfunct_t[:]
+
+    cdef:
+        ndarray[htfunc_t] keys
+        ndarray[htfunc_t] modes
+        ndarray[uint8_t] res_mask = None
+
+        int64_t[::1] counts
+        int64_t count, _, max_count = -1
+        Py_ssize_t nkeys, k, na_counter, j = 0
+
+    keys, counts, na_counter = value_count(values, dropna, mask=mask)
+    nkeys = len(keys)
+
+    modes = np.empty(nkeys, dtype=values.dtype)
+
+    if htfunc_t is not object:
+        with nogil:
+            for k in range(nkeys):
+                count = counts[k]
+                if count == max_count:
+                    j += 1
+                elif count > max_count:
+                    max_count = count
+                    j = 0
+                else:
+                    continue
+
+                modes[j] = keys[k]
+    else:
+        for k in range(nkeys):
+            count = counts[k]
+            if count == max_count:
+                j += 1
+            elif count > max_count:
+                max_count = count
+                j = 0
+            else:
+                continue
+
+            modes[j] = keys[k]
+
+    if na_counter > 0:
+        res_mask = np.zeros(j+1, dtype=np.bool_)
+        res_mask[j] = True
+    return modes[:j + 1], res_mask
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 336af306d410f..c9d289335062e 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -281,172 +281,6 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
 
 {{endfor}}
 
-
-ctypedef fused htfunc_t:
-    numeric_object_t
-    complex128_t
-    complex64_t
-
-
-cpdef value_count(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
-    if htfunc_t is object:
-        return value_count_object(values, dropna, mask=mask)
-
-    elif htfunc_t is int8_t:
-        return value_count_int8(values, dropna, mask=mask)
-    elif htfunc_t is int16_t:
-        return value_count_int16(values, dropna, mask=mask)
-    elif htfunc_t is int32_t:
-        return value_count_int32(values, dropna, mask=mask)
-    elif htfunc_t is int64_t:
-        return value_count_int64(values, dropna, mask=mask)
-
-    elif htfunc_t is uint8_t:
-        return value_count_uint8(values, dropna, mask=mask)
-    elif htfunc_t is uint16_t:
-        return value_count_uint16(values, dropna, mask=mask)
-    elif htfunc_t is uint32_t:
-        return value_count_uint32(values, dropna, mask=mask)
-    elif htfunc_t is uint64_t:
-        return value_count_uint64(values, dropna, mask=mask)
-
-    elif htfunc_t is float64_t:
-        return value_count_float64(values, dropna, mask=mask)
-    elif htfunc_t is float32_t:
-        return value_count_float32(values, dropna, mask=mask)
-
-    elif htfunc_t is complex128_t:
-        return value_count_complex128(values, dropna, mask=mask)
-    elif htfunc_t is complex64_t:
-        return value_count_complex64(values, dropna, mask=mask)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-cpdef duplicated(ndarray[htfunc_t] values, object keep="first", const uint8_t[:] mask=None):
-    if htfunc_t is object:
-        return duplicated_object(values, keep, mask=mask)
-
-    elif htfunc_t is int8_t:
-        return duplicated_int8(values, keep, mask=mask)
-    elif htfunc_t is int16_t:
-        return duplicated_int16(values, keep, mask=mask)
-    elif htfunc_t is int32_t:
-        return duplicated_int32(values, keep, mask=mask)
-    elif htfunc_t is int64_t:
-        return duplicated_int64(values, keep, mask=mask)
-
-    elif htfunc_t is uint8_t:
-        return duplicated_uint8(values, keep, mask=mask)
-    elif htfunc_t is uint16_t:
-        return duplicated_uint16(values, keep, mask=mask)
-    elif htfunc_t is uint32_t:
-        return duplicated_uint32(values, keep, mask=mask)
-    elif htfunc_t is uint64_t:
-        return duplicated_uint64(values, keep, mask=mask)
-
-    elif htfunc_t is float64_t:
-        return duplicated_float64(values, keep, mask=mask)
-    elif htfunc_t is float32_t:
-        return duplicated_float32(values, keep, mask=mask)
-
-    elif htfunc_t is complex128_t:
-        return duplicated_complex128(values, keep, mask=mask)
-    elif htfunc_t is complex64_t:
-        return duplicated_complex64(values, keep, mask=mask)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
-    if htfunc_t is object:
-        return ismember_object(arr, values)
-
-    elif htfunc_t is int8_t:
-        return ismember_int8(arr, values)
-    elif htfunc_t is int16_t:
-        return ismember_int16(arr, values)
-    elif htfunc_t is int32_t:
-        return ismember_int32(arr, values)
-    elif htfunc_t is int64_t:
-        return ismember_int64(arr, values)
-
-    elif htfunc_t is uint8_t:
-        return ismember_uint8(arr, values)
-    elif htfunc_t is uint16_t:
-        return ismember_uint16(arr, values)
-    elif htfunc_t is uint32_t:
-        return ismember_uint32(arr, values)
-    elif htfunc_t is uint64_t:
-        return ismember_uint64(arr, values)
-
-    elif htfunc_t is float64_t:
-        return ismember_float64(arr, values)
-    elif htfunc_t is float32_t:
-        return ismember_float32(arr, values)
-
-    elif htfunc_t is complex128_t:
-        return ismember_complex128(arr, values)
-    elif htfunc_t is complex64_t:
-        return ismember_complex64(arr, values)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
-    # TODO(cython3): use const htfunct_t[:]
-
-    cdef:
-        ndarray[htfunc_t] keys
-        ndarray[htfunc_t] modes
-        ndarray[uint8_t] res_mask = None
-
-        int64_t[::1] counts
-        int64_t count, _, max_count = -1
-        Py_ssize_t nkeys, k, na_counter, j = 0
-
-    keys, counts, na_counter = value_count(values, dropna, mask=mask)
-    nkeys = len(keys)
-
-    modes = np.empty(nkeys, dtype=values.dtype)
-
-    if htfunc_t is not object:
-        with nogil:
-            for k in range(nkeys):
-                count = counts[k]
-                if count == max_count:
-                    j += 1
-                elif count > max_count:
-                    max_count = count
-                    j = 0
-                else:
-                    continue
-
-                modes[j] = keys[k]
-    else:
-        for k in range(nkeys):
-            count = counts[k]
-            if count == max_count:
-                j += 1
-            elif count > max_count:
-                max_count = count
-                j = 0
-            else:
-                continue
-
-            modes[j] = keys[k]
-
-    if na_counter > 0:
-        res_mask = np.zeros(j+1, dtype=np.bool_)
-        res_mask[j] = True
-    return modes[:j + 1], res_mask
-
-
 {{py:
 
 # name, dtype, ttype, c_type

From 37e6410d5cf7c64b77bb013bf3fdad01382daf3d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 12:40:01 -0800
Subject: [PATCH 02/14] working compilation and passing tests

---
 pandas/_libs/hashtable.pyx                    |    2 -
 pandas/_libs/hashtable_cpp.pyx                |   69 +
 pandas/_libs/hashtable_func_helper.pxi.in     |   47 -
 .../pandas/vendored/klib/cpp/khash.hpp        |  209 ++
 pandas/_libs/meson.build                      |   10 +
 .../src/vendored/ujson/python/objToJSON_old.c | 2057 +++++++++++++++++
 pandas/core/sorting.py                        |    2 +-
 pandas/tests/libs/test_hashtable.py           |   11 +-
 8 files changed, 2353 insertions(+), 54 deletions(-)
 create mode 100644 pandas/_libs/hashtable_cpp.pyx
 create mode 100644 pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp
 create mode 100644 pandas/_libs/src/vendored/ujson/python/objToJSON_old.c

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 0e24d34a0730f..6dad3a43ba8b2 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -61,10 +61,8 @@ include "hashtable_func_helper.pxi"
 # map derived hash-map types onto basic hash-map types:
 if np.dtype(np.intp) == np.dtype(np.int64):
     IntpHashTable = Int64HashTable
-    unique_label_indices = _unique_label_indices_int64
 elif np.dtype(np.intp) == np.dtype(np.int32):
     IntpHashTable = Int32HashTable
-    unique_label_indices = _unique_label_indices_int32
 else:
     raise ValueError(np.dtype(np.intp))
 
diff --git a/pandas/_libs/hashtable_cpp.pyx b/pandas/_libs/hashtable_cpp.pyx
new file mode 100644
index 0000000000000..f25a3274de8b5
--- /dev/null
+++ b/pandas/_libs/hashtable_cpp.pyx
@@ -0,0 +1,69 @@
+import cython
+import numpy as np
+
+cimport numpy as cnp
+from libc.stdint cimport uint32_t
+from libc.string cimport memcpy
+from libcpp.vector cimport vector
+
+from pandas._libs.khash cimport kh_needed_n_buckets
+
+
+cdef extern from "<functional>" namespace "std" nogil:
+    cdef cppclass hash[T]:
+        hash()
+        size_t operator()
+
+cdef extern from "pandas/vendored/klib/cpp/khash.hpp" namespace "klib" nogil:
+    cdef cppclass KHash[T, Hash, Eq=*, khint_t=*]:
+        T *keys
+        KHash()
+        # TODO: validate we don't need deconstructor
+        # ~KHash()
+        void exist(khint_t x)
+        T &at(khint_t x)
+        khint_t get(const T &)
+        # TODO: make this khint_t
+        # int resize(khint_t)
+        int resize(uint32_t)
+        khint_t put(const T &, int *)
+        # void del(khint_t x)
+
+
+# TODO: de-duplicate from hashtable.pyx
+cdef uint32_t SIZE_HINT_LIMIT = (1 << 20) + 7
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def unique_label_indices(const cnp.npy_intp[:] labels) -> cnp.ndarray:
+    """
+    Indices of the first occurrences of the unique labels
+    *excluding* -1. equivalent to:
+        np.unique(labels, return_index=True)[1]
+    """
+    cdef:
+        int ret = 0
+        Py_ssize_t i, n = len(labels)
+        KHash[cnp.npy_intp, hash[cnp.npy_intp]] *table = (
+            new KHash[cnp.npy_intp, hash[cnp.npy_intp]]()
+        )
+        cnp.ndarray[cnp.npy_intp, ndim=1] arr
+        vector[cnp.npy_intp] idx = vector[cnp.npy_intp]()
+
+    table.resize(min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
+
+    with nogil:
+        for i in range(n):
+            table.put(labels[i], &ret)
+            if ret != 0:
+                # TODO: pandas has a custom resize operation but we
+                # rely on C++ stdlib here - how different are they?
+                idx.push_back(i)
+
+    # TODO: must be a cleaner way to do this?
+    arr = np.empty(idx.size(), dtype=np.intp)
+    memcpy(arr.data, idx.const_data(), idx.size() * sizeof(cnp.npy_intp))
+    arr = arr[np.asarray(labels)[arr].argsort()]
+
+    return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index c9d289335062e..69e9cd0b90fa5 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -280,50 +280,3 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
 # ----------------------------------------------------------------------
 
 {{endfor}}
-
-{{py:
-
-# name, dtype, ttype, c_type
-dtypes = [('Int64', 'int64', 'int64', 'int64_t'),
-          ('Int32', 'int32', 'int32', 'int32_t'), ]
-
-}}
-
-{{for name, dtype, ttype, c_type in dtypes}}
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
-    """
-    Indices of the first occurrences of the unique labels
-    *excluding* -1. equivalent to:
-        np.unique(labels, return_index=True)[1]
-    """
-    cdef:
-        int ret = 0
-        Py_ssize_t i, n = len(labels)
-        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
-        {{name}}Vector idx = {{name}}Vector()
-        ndarray[{{c_type}}, ndim=1] arr
-        {{name}}VectorData *ud = idx.data
-
-    kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
-
-    with nogil:
-        for i in range(n):
-            kh_put_{{ttype}}(table, labels[i], &ret)
-            if ret != 0:
-                if needs_resize(ud):
-                    with gil:
-                        idx.resize()
-                append_data_{{ttype}}(ud, i)
-
-    kh_destroy_{{ttype}}(table)
-
-    arr = idx.to_array()
-    arr = arr[np.asarray(labels)[arr].argsort()]
-
-    return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
-
-{{endfor}}
diff --git a/pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp b/pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp
new file mode 100644
index 0000000000000..465bdbba9bd99
--- /dev/null
+++ b/pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp
@@ -0,0 +1,209 @@
+#ifndef KHASH_HPP
+#define KHASH_HPP
+
+#include <cstdlib> // for malloc() etc
+#include <cstring> // for memset()
+#include <functional>
+#include <memory>
+
+#include <stdint.h> // for uint32_t
+
+namespace klib {
+
+#ifndef kroundup32 // FIXME: doesn't work for 64-bit integers
+#define kroundup32(x)                                                          \
+  (--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, (x) |= (x) >> 8,  \
+   (x) |= (x) >> 16, ++(x))
+#endif
+
+#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
+#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
+#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
+#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
+#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3)
+#define __ac_set_isdel_false(flag, i)                                          \
+  (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
+#define __ac_set_isempty_false(flag, i)                                        \
+  (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
+#define __ac_set_isboth_false(flag, i)                                         \
+  (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
+#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1))
+
+#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4)
+
+template <class T, class Hash, class Eq = std::equal_to<T>,
+          typename khint_t = uint32_t>
+class KHash {
+  khint_t n_buckets, count, n_occupied, upper_bound;
+  uint32_t *flags;
+  T *keys;
+
+public:
+  KHash()
+      : n_buckets(0), count(0), n_occupied(0), upper_bound(0), flags(NULL),
+        keys(NULL){};
+  ~KHash() {
+    std::free(flags);
+    std::free(keys);
+  };
+  khint_t capacity(void) const { return n_buckets; };
+  khint_t size(void) const { return count; };
+  khint_t begin(void) const { return 0; };
+  khint_t end(void) const { return n_buckets; };
+
+  void exist(khint_t x) const { return !__ac_iseither(flags, x); };
+  T &at(khint_t x) { return keys[x]; };
+
+  khint_t get(const T &key) const {
+    if (n_buckets) {
+      khint_t k, i, last, mask, step = 0;
+      mask = n_buckets - 1;
+      k = Hash()(key);
+      i = k & mask;
+      last = i;
+      while (!__ac_isempty(flags, i) &&
+             (__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
+        i = (i + (++step)) & mask;
+        if (i == last)
+          return n_buckets;
+      }
+      return __ac_iseither(flags, i) ? n_buckets : i;
+    } else
+      return 0;
+  };
+
+  int resize(khint_t new_n_buckets) {
+    uint32_t *new_flags = 0;
+    khint_t j = 1;
+    {
+      kroundup32(new_n_buckets);
+      if (new_n_buckets < 4)
+        new_n_buckets = 4;
+      if (count >= (new_n_buckets >> 1) + (new_n_buckets >> 2))
+        j = 0; /* requested count is too small */
+      else {   /* hash table count to be changed (shrink or expand); rehash */
+        new_flags = (uint32_t *)std::malloc(__ac_fsize(new_n_buckets) *
+                                            sizeof(uint32_t));
+        if (!new_flags)
+          return -1;
+        ::memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(uint32_t));
+        if (n_buckets < new_n_buckets) { /* expand */
+          T *new_keys =
+              (T *)std::realloc((void *)keys, new_n_buckets * sizeof(T));
+          if (!new_keys) {
+            std::free(new_flags);
+            return -1;
+          }
+          keys = new_keys;
+        } /* otherwise shrink */
+      }
+    }
+    if (j) { /* rehashing is needed */
+      for (j = 0; j != n_buckets; ++j) {
+        if (__ac_iseither(flags, j) == 0) {
+          T key = keys[j];
+          khint_t new_mask;
+          new_mask = new_n_buckets - 1;
+          __ac_set_isdel_true(flags, j);
+          while (1) { /* kick-out process; sort of like in Cuckoo hashing */
+            khint_t k, i, step = 0;
+            k = Hash()(key);
+            i = k & new_mask;
+            while (!__ac_isempty(new_flags, i))
+              i = (i + (++step)) & new_mask;
+            __ac_set_isempty_false(new_flags, i);
+            if (i < n_buckets && __ac_iseither(flags, i) ==
+                                     0) { /* kick out the existing element */
+              {
+                T tmp = keys[i];
+                keys[i] = key;
+                key = tmp;
+              }
+              __ac_set_isdel_true(
+                  flags, i); /* mark it as deleted in the old hash table */
+            } else {         /* write the element and jump out of the loop */
+              keys[i] = key;
+              break;
+            }
+          }
+        }
+      }
+      if (n_buckets > new_n_buckets) /* shrink the hash table */
+        keys = (T *)std::realloc((void *)keys, new_n_buckets * sizeof(T));
+      std::free(flags); /* free the working space */
+      flags = new_flags;
+      n_buckets = new_n_buckets;
+      n_occupied = count;
+      upper_bound = (n_buckets >> 1) + (n_buckets >> 2);
+    }
+    return 0;
+  };
+
+  khint_t put(const T &key, int *ret) {
+    khint_t x;
+    if (n_occupied >= upper_bound) { /* update the hash table */
+      if (n_buckets > (count << 1)) {
+        if (resize(n_buckets - 1) < 0) { /* clear "deleted" elements */
+          *ret = -1;
+          return n_buckets;
+        }
+      } else if (resize(n_buckets + 1) < 0) { /* expand the hash table */
+        *ret = -1;
+        return n_buckets;
+      }
+    } /* TODO: to implement automatically shrinking; resize() already support
+         shrinking */
+    {
+      khint_t k, i, site, last, mask = n_buckets - 1, step = 0;
+      x = site = n_buckets;
+      k = Hash()(key);
+      i = k & mask;
+      if (__ac_isempty(flags, i))
+        x = i; /* for speed up */
+      else {
+        last = i;
+        while (!__ac_isempty(flags, i) &&
+               (__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
+          if (__ac_isdel(flags, i))
+            site = i;
+          i = (i + (++step)) & mask;
+          if (i == last) {
+            x = site;
+            break;
+          }
+        }
+        if (x == n_buckets) {
+          if (__ac_isempty(flags, i) && site != n_buckets)
+            x = site;
+          else
+            x = i;
+        }
+      }
+    }
+    if (__ac_isempty(flags, x)) { /* not present at all */
+      keys[x] = key;
+      __ac_set_isboth_false(flags, x);
+      ++count;
+      ++n_occupied;
+      *ret = 1;
+    } else if (__ac_isdel(flags, x)) { /* deleted */
+      keys[x] = key;
+      __ac_set_isboth_false(flags, x);
+      ++count;
+      *ret = 2;
+    } else
+      *ret = 0; /* Don't touch keys[x] if present and not deleted */
+    return x;
+  };
+
+  void del(khint_t x) {
+    if (x != n_buckets && !__ac_iseither(flags, x)) {
+      __ac_set_isdel_true(flags, x);
+      --count;
+    }
+  };
+};
+
+} // end of namespace klib
+
+#endif
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index c27386743c6e9..1a4b2553526af 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -122,6 +122,16 @@ foreach ext_name, ext_dict : libs_sources
     )
 endforeach
 
+# hashtable_cpp is an exception because it requires cpp compiler
+py.extension_module(
+  'hashtable_cpp',
+  ['hashtable_cpp.pyx'],
+  include_directories: [inc_np, inc_pd],
+  subdir: 'pandas/_libs',
+  override_options : ['cython_language=cpp'],
+  install: true
+)
+
 # Basically just __init__.py and the .pyi files
 sources_to_install = [
     '__init__.py',
diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c b/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c
new file mode 100644
index 0000000000000..9f1c1d3f857d1
--- /dev/null
+++ b/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c
@@ -0,0 +1,2057 @@
+/*
+Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of the ESN Social Software AB nor the
+names of its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
+https://github.com/client9/stringencoders
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights
+reserved.
+
+Numeric decoder derived from TCL library
+https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
+* Copyright (c) 1988-1993 The Regents of the University of California.
+* Copyright (c) 1994 Sun Microsystems, Inc.
+*/
+
+// Licence at LICENSES/ULTRAJSON_LICENSE
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <math.h>
+
+#define NO_IMPORT_ARRAY
+#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
+#include "datetime.h"
+#include "pandas/datetime/pd_datetime.h"
+#include "pandas/vendored/ujson/lib/ultrajson.h"
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+#include <numpy/ndarraytypes.h>
+#include <numpy/npy_math.h>
+
+npy_int64 get_nat(void) { return NPY_MIN_INT64; }
+
+typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
+                                  size_t *_outLen);
+
+int object_is_decimal_type(PyObject *obj);
+int object_is_dataframe_type(PyObject *obj);
+int object_is_series_type(PyObject *obj);
+int object_is_index_type(PyObject *obj);
+int object_is_nat_type(PyObject *obj);
+int object_is_na_type(PyObject *obj);
+
+typedef struct __NpyArrContext {
+  PyObject *array;
+  char *dataptr;
+  int curdim;    // current dimension in array's order
+  int stridedim; // dimension we are striding over
+  int inc;       // stride dimension increment (+/- 1)
+  npy_intp dim;
+  npy_intp stride;
+  npy_intp ndim;
+  npy_intp index[NPY_MAXDIMS];
+  int type_num;
+  PyArray_GetItemFunc *getitem;
+
+  char **rowLabels;
+  char **columnLabels;
+} NpyArrContext;
+
+typedef struct __PdBlockContext {
+  int colIdx;
+  int ncols;
+  int transpose;
+
+  NpyArrContext **npyCtxts; // NpyArrContext for each column
+} PdBlockContext;
+
+typedef struct __TypeContext {
+  JSPFN_ITERBEGIN iterBegin;
+  JSPFN_ITEREND iterEnd;
+  JSPFN_ITERNEXT iterNext;
+  JSPFN_ITERGETNAME iterGetName;
+  JSPFN_ITERGETVALUE iterGetValue;
+  PFN_PyTypeToUTF8 PyTypeToUTF8;
+  PyObject *newObj;
+  PyObject *dictObj;
+  Py_ssize_t index;
+  Py_ssize_t size;
+  PyObject *itemValue;
+  PyObject *itemName;
+  PyObject *attrList;
+  PyObject *iterator;
+
+  double doubleValue;
+  JSINT64 longValue;
+
+  char *cStr;
+  NpyArrContext *npyarr;
+  PdBlockContext *pdblock;
+  int transpose;
+  char **rowLabels;
+  char **columnLabels;
+  npy_intp rowLabelsLen;
+  npy_intp columnLabelsLen;
+} TypeContext;
+
+typedef struct __PyObjectEncoder {
+  JSONObjectEncoder enc;
+
+  // pass through the NpyArrContext when encoding multi-dimensional arrays
+  NpyArrContext *npyCtxtPassthru;
+
+  // pass through the PdBlockContext when encoding blocks
+  PdBlockContext *blkCtxtPassthru;
+
+  // pass-through to encode numpy data directly
+  int npyType;
+  void *npyValue;
+
+  int datetimeIso;
+  NPY_DATETIMEUNIT datetimeUnit;
+  NPY_DATETIMEUNIT valueUnit;
+
+  // output format style for pandas data types
+  int outputFormat;
+  int originalOutputFormat;
+
+  PyObject *defaultHandler;
+} PyObjectEncoder;
+
+#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv))
+
+enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES };
+
+static int PdBlock_iterNext(JSOBJ, JSONTypeContext *);
+
+static TypeContext *createTypeContext(void) {
+  TypeContext *pc = PyObject_Malloc(sizeof(TypeContext));
+  if (!pc) {
+    PyErr_NoMemory();
+    return NULL;
+  }
+  pc->newObj = NULL;
+  pc->dictObj = NULL;
+  pc->itemValue = NULL;
+  pc->itemName = NULL;
+  pc->attrList = NULL;
+  pc->index = 0;
+  pc->size = 0;
+  pc->longValue = 0;
+  pc->doubleValue = 0.0;
+  pc->cStr = NULL;
+  pc->npyarr = NULL;
+  pc->pdblock = NULL;
+  pc->rowLabels = NULL;
+  pc->columnLabels = NULL;
+  pc->transpose = 0;
+  pc->rowLabelsLen = 0;
+  pc->columnLabelsLen = 0;
+
+  return pc;
+}
+
+static PyObject *get_values(PyObject *obj) {
+  PyObject *values = NULL;
+
+  if (object_is_index_type(obj) || object_is_series_type(obj)) {
+    // The special cases to worry about are dt64tz and category[dt64tz].
+    //  In both cases we want the UTC-localized datetime64 ndarray,
+    //  without going through and object array of Timestamps.
+    if (PyObject_HasAttrString(obj, "tz")) {
+      PyObject *tz = PyObject_GetAttrString(obj, "tz");
+      if (tz != Py_None) {
+        // Go through object array if we have dt64tz, since tz info will
+        // be lost if values is used directly.
+        Py_DECREF(tz);
+        values = PyObject_CallMethod(obj, "__array__", NULL);
+        return values;
+      }
+      Py_DECREF(tz);
+    }
+    values = PyObject_GetAttrString(obj, "values");
+    if (values == NULL) {
+      // Clear so we can subsequently try another method
+      PyErr_Clear();
+    } else if (PyObject_HasAttrString(values, "__array__")) {
+      // We may have gotten a Categorical or Sparse array so call np.array
+      PyObject *array_values = PyObject_CallMethod(values, "__array__", NULL);
+      Py_DECREF(values);
+      values = array_values;
+    } else if (!PyArray_CheckExact(values)) {
+      // Didn't get a numpy array, so keep trying
+      Py_DECREF(values);
+      values = NULL;
+    }
+  }
+
+  if (values == NULL) {
+    PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj));
+    PyObject *repr;
+    if (PyObject_HasAttrString(obj, "dtype")) {
+      PyObject *dtype = PyObject_GetAttrString(obj, "dtype");
+      repr = PyObject_Repr(dtype);
+      Py_DECREF(dtype);
+    } else {
+      repr = PyUnicode_FromString("<unknown dtype>");
+    }
+
+    PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
+                 repr, typeRepr);
+    Py_DECREF(repr);
+    Py_DECREF(typeRepr);
+
+    return NULL;
+  }
+
+  return values;
+}
+
+static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) {
+  PyObject *tmp = PyObject_GetAttrString(obj, attr);
+  if (tmp == 0) {
+    return 0;
+  }
+  PyObject *ret = PyObject_GetAttrString(tmp, subAttr);
+  Py_DECREF(tmp);
+
+  return ret;
+}
+
+static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
+  PyObject *tmp = PyObject_GetAttrString(obj, attr);
+  if (tmp == 0) {
+    return 0;
+  }
+  Py_ssize_t ret = PyObject_Length(tmp);
+  Py_DECREF(tmp);
+
+  if (ret == -1) {
+    return 0;
+  }
+
+  return ret;
+}
+
+static npy_int64 get_long_attr(PyObject *o, const char *attr) {
+  // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT
+
+  PyObject *value = PyObject_GetAttrString(o, attr);
+  const npy_int64 long_val =
+      (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));
+
+  Py_DECREF(value);
+
+  if (object_is_nat_type(o)) {
+    // i.e. o is NaT, long_val will be NPY_MIN_INT64
+    return long_val;
+  }
+
+  // ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit
+  PyObject *reso = PyObject_GetAttrString(o, "_creso");
+  if (!PyLong_Check(reso)) {
+    // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139
+    Py_DECREF(reso);
+    return -1;
+  }
+
+  long cReso = PyLong_AsLong(reso);
+  Py_DECREF(reso);
+  if (cReso == -1 && PyErr_Occurred()) {
+    return -1;
+  }
+
+  if (cReso == NPY_FR_us) {
+    return long_val * 1000L;
+  } else if (cReso == NPY_FR_ms) {
+    return long_val * 1000000L;
+  } else if (cReso == NPY_FR_s) {
+    return long_val * 1000000000L;
+  }
+
+  return long_val;
+}
+
+static npy_float64 total_seconds(PyObject *td) {
+  PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL);
+  const npy_float64 double_val = PyFloat_AS_DOUBLE(value);
+  Py_DECREF(value);
+  return double_val;
+}
+
+static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
+                           size_t *_outLen) {
+  PyObject *obj = (PyObject *)_obj;
+  *_outLen = PyBytes_GET_SIZE(obj);
+  return PyBytes_AS_STRING(obj);
+}
+
+static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
+  char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
+  if (encoded == NULL) {
+    /* Something went wrong.
+      Set errorMsg(to tell encoder to stop),
+      and let Python exception propagate. */
+    JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
+    enc->errorMsg = "Encoding failed.";
+  }
+  return encoded;
+}
+
+/* JSON callback. returns a char* and mutates the pointer to *len */
+static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
+                                      JSONTypeContext *tc, size_t *len) {
+  NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+  NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
+  GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
+  return GET_TC(tc)->cStr;
+}
+
+/* JSON callback. returns a char* and mutates the pointer to *len */
+static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
+                                       JSONTypeContext *tc, size_t *len) {
+  GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
+  return GET_TC(tc)->cStr;
+}
+
+/* JSON callback */
+static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
+                                     size_t *len) {
+  if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
+    PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
+    ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
+    return NULL;
+  }
+
+  NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+  return PyDateTimeToIso(obj, base, len);
+}
+
+static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
+  PyObject *obj = (PyObject *)_obj;
+  PyObject *str = PyObject_CallMethod(obj, "isoformat", NULL);
+  if (str == NULL) {
+    *outLen = 0;
+    if (!PyErr_Occurred()) {
+      PyErr_SetString(PyExc_ValueError, "Failed to convert time");
+    }
+    ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
+    return NULL;
+  }
+  if (PyUnicode_Check(str)) {
+    PyObject *tmp = str;
+    str = PyUnicode_AsUTF8String(str);
+    Py_DECREF(tmp);
+  }
+
+  GET_TC(tc)->newObj = str;
+
+  *outLen = PyBytes_GET_SIZE(str);
+  char *outValue = PyBytes_AS_STRING(str);
+  return outValue;
+}
+
+//=============================================================================
+// Numpy array iteration functions
+//=============================================================================
+
+static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->npyarr &&
+      GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) {
+    Py_XDECREF(GET_TC(tc)->itemValue);
+    GET_TC(tc)->itemValue = NULL;
+  }
+}
+
+static int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj),
+                               JSONTypeContext *Py_UNUSED(tc)) {
+  return 0;
+}
+
+static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
+  PyArrayObject *obj =
+      (PyArrayObject *)(GET_TC(tc)->newObj ? GET_TC(tc)->newObj : _obj);
+
+  NpyArrContext *npyarr = PyObject_Malloc(sizeof(NpyArrContext));
+  GET_TC(tc)->npyarr = npyarr;
+
+  if (!npyarr) {
+    PyErr_NoMemory();
+    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+    return;
+  }
+
+  npyarr->array = (PyObject *)obj;
+  npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
+  npyarr->dataptr = PyArray_DATA(obj);
+  npyarr->ndim = PyArray_NDIM(obj) - 1;
+  npyarr->curdim = 0;
+  npyarr->type_num = PyArray_DESCR(obj)->type_num;
+
+  if (GET_TC(tc)->transpose) {
+    npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
+    npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
+    npyarr->stridedim = npyarr->ndim;
+    npyarr->index[npyarr->ndim] = 0;
+    npyarr->inc = -1;
+  } else {
+    npyarr->dim = PyArray_DIM(obj, 0);
+    npyarr->stride = PyArray_STRIDE(obj, 0);
+    npyarr->stridedim = 0;
+    npyarr->index[0] = 0;
+    npyarr->inc = 1;
+  }
+
+  npyarr->columnLabels = GET_TC(tc)->columnLabels;
+  npyarr->rowLabels = GET_TC(tc)->rowLabels;
+}
+
+static void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+
+  if (npyarr) {
+    NpyArr_freeItemValue(obj, tc);
+    PyObject_Free(npyarr);
+  }
+}
+
+static void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj),
+                                     JSONTypeContext *Py_UNUSED(tc)) {}
+
+static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+  // finished this dimension, reset the data pointer
+  npyarr->curdim--;
+  npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim];
+  npyarr->stridedim -= npyarr->inc;
+  npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
+  npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
+  npyarr->dataptr += npyarr->stride;
+
+  NpyArr_freeItemValue(obj, tc);
+}
+
+static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+
+  if (PyErr_Occurred()) {
+    return 0;
+  }
+
+  if (npyarr->index[npyarr->stridedim] >= npyarr->dim) {
+    return 0;
+  }
+
+  NpyArr_freeItemValue(obj, tc);
+
+  if (PyArray_ISDATETIME(npyarr->array)) {
+    GET_TC(tc)->itemValue = obj;
+    Py_INCREF(obj);
+    ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
+    // Also write the resolution (unit) of the ndarray
+    PyArray_Descr *dtype = PyArray_DESCR(npyarr->array);
+    ((PyObjectEncoder *)tc->encoder)->valueUnit =
+        get_datetime_metadata_from_dtype(dtype).base;
+    ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
+    ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
+  } else {
+    GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
+  }
+
+  npyarr->dataptr += npyarr->stride;
+  npyarr->index[npyarr->stridedim]++;
+  return 1;
+}
+
+static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+
+  if (PyErr_Occurred()) {
+    return 0;
+  }
+
+  if (npyarr->curdim >= npyarr->ndim ||
+      npyarr->index[npyarr->stridedim] >= npyarr->dim) {
+    // innermost dimension, start retrieving item values
+    GET_TC(tc)->iterNext = NpyArr_iterNextItem;
+    return NpyArr_iterNextItem(_obj, tc);
+  }
+
+  // dig a dimension deeper
+  npyarr->index[npyarr->stridedim]++;
+
+  npyarr->curdim++;
+  npyarr->stridedim += npyarr->inc;
+  npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
+  npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
+  npyarr->index[npyarr->stridedim] = 0;
+
+  ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
+  GET_TC(tc)->itemValue = npyarr->array;
+  return 1;
+}
+
+static JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                                size_t *outLen) {
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+  char *cStr;
+
+  if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
+    const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
+    cStr = npyarr->columnLabels[idx];
+  } else {
+    const npy_intp idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
+    cStr = npyarr->rowLabels[idx];
+  }
+
+  *outLen = strlen(cStr);
+
+  return cStr;
+}
+
+//=============================================================================
+// Pandas block iteration functions
+//
+// Serialises a DataFrame column by column to avoid unnecessary data copies and
+// more representative serialisation when dealing with mixed dtypes.
+//
+// Uses a dedicated NpyArrContext for each column.
+//=============================================================================
+
+static void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+
+  if (blkCtxt->transpose) {
+    blkCtxt->colIdx++;
+  } else {
+    blkCtxt->colIdx = 0;
+  }
+
+  NpyArr_freeItemValue(obj, tc);
+}
+
+static int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+
+  if (blkCtxt->colIdx >= blkCtxt->ncols) {
+    return 0;
+  }
+
+  GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
+  blkCtxt->colIdx++;
+  return NpyArr_iterNextItem(obj, tc);
+}
+
+static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                                 size_t *outLen) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+  NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
+  char *cStr;
+
+  if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
+    const npy_intp idx = blkCtxt->colIdx - 1;
+    cStr = npyarr->columnLabels[idx];
+  } else {
+    const npy_intp idx =
+        GET_TC(tc)->iterNext != PdBlock_iterNext
+            ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1
+            : npyarr->index[npyarr->stridedim];
+
+    cStr = npyarr->rowLabels[idx];
+  }
+
+  *outLen = strlen(cStr);
+  return cStr;
+}
+
+static char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
+                                           JSONTypeContext *tc,
+                                           size_t *outLen) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+  NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
+  char *cStr;
+
+  if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
+    const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
+    cStr = npyarr->columnLabels[idx];
+  } else {
+    const npy_intp idx = blkCtxt->colIdx;
+    cStr = npyarr->rowLabels[idx];
+  }
+
+  *outLen = strlen(cStr);
+  return cStr;
+}
+
+static int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+
+  if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
+    return 0;
+  }
+
+  if (blkCtxt->transpose) {
+    if (blkCtxt->colIdx >= blkCtxt->ncols) {
+      return 0;
+    }
+  } else {
+    const NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
+    if (npyarr->index[npyarr->stridedim] >= npyarr->dim) {
+      return 0;
+    }
+  }
+
+  ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt;
+  GET_TC(tc)->itemValue = obj;
+
+  return 1;
+}
+
+static void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj),
+                                      JSONTypeContext *tc) {
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+
+  if (blkCtxt->transpose) {
+    // if transposed we exhaust each column before moving to the next
+    GET_TC(tc)->iterNext = NpyArr_iterNextItem;
+    GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose;
+    GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
+  }
+}
+
+static void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
+  PyObject *obj = (PyObject *)_obj;
+
+  GET_TC(tc)->iterGetName = GET_TC(tc)->transpose
+                                ? PdBlock_iterGetName_Transpose
+                                : PdBlock_iterGetName;
+
+  PdBlockContext *blkCtxt = PyObject_Malloc(sizeof(PdBlockContext));
+  if (!blkCtxt) {
+    PyErr_NoMemory();
+    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+    return;
+  }
+  GET_TC(tc)->pdblock = blkCtxt;
+
+  blkCtxt->colIdx = 0;
+  blkCtxt->transpose = GET_TC(tc)->transpose;
+  blkCtxt->ncols = get_attr_length(obj, "columns");
+
+  if (blkCtxt->ncols == 0) {
+    blkCtxt->npyCtxts = NULL;
+
+    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+    return;
+  }
+
+  blkCtxt->npyCtxts = PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols);
+  if (!blkCtxt->npyCtxts) {
+    PyErr_NoMemory();
+    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+    return;
+  }
+
+  PyObject *arrays = get_sub_attr(obj, "_mgr", "column_arrays");
+  if (!arrays) {
+    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+    return;
+  }
+
+  for (Py_ssize_t i = 0; i < PyObject_Length(arrays); i++) {
+    PyObject *array = PyList_GET_ITEM(arrays, i);
+    if (!array) {
+      GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+      goto ARR_RET;
+    }
+
+    // ensure we have a numpy array (i.e. np.asarray)
+    PyObject *values = PyObject_CallMethod(array, "__array__", NULL);
+    if ((!values) || (!PyArray_CheckExact(values))) {
+      // Didn't get a numpy array
+      ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
+      GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+      goto ARR_RET;
+    }
+
+    GET_TC(tc)->newObj = values;
+
+    // init a dedicated context for this column
+    NpyArr_iterBegin(obj, tc);
+
+    GET_TC(tc)->itemValue = NULL;
+    ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
+
+    blkCtxt->npyCtxts[i] = GET_TC(tc)->npyarr;
+    GET_TC(tc)->newObj = NULL;
+  }
+  GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0];
+  goto ARR_RET;
+
+ARR_RET:
+  Py_DECREF(arrays);
+}
+
+static void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->itemValue = NULL;
+  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
+  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
+
+  if (blkCtxt) {
+    for (int i = 0; i < blkCtxt->ncols; i++) {
+      npyarr = blkCtxt->npyCtxts[i];
+      if (npyarr) {
+        if (npyarr->array) {
+          Py_DECREF(npyarr->array);
+          npyarr->array = NULL;
+        }
+
+        GET_TC(tc)->npyarr = npyarr;
+        NpyArr_iterEnd(obj, tc);
+
+        blkCtxt->npyCtxts[i] = NULL;
+      }
+    }
+
+    if (blkCtxt->npyCtxts) {
+      PyObject_Free(blkCtxt->npyCtxts);
+    }
+    PyObject_Free(blkCtxt);
+  }
+}
+
+//=============================================================================
+// Tuple iteration functions
+// itemValue is borrowed reference, no ref counting
+//=============================================================================
+static void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj);
+  GET_TC(tc)->itemValue = NULL;
+}
+
+static int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+
+  if (GET_TC(tc)->index >= GET_TC(tc)->size) {
+    return 0;
+  }
+
+  PyObject *item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index);
+
+  GET_TC(tc)->itemValue = item;
+  GET_TC(tc)->index++;
+  return 1;
+}
+
+static void Tuple_iterEnd(JSOBJ Py_UNUSED(obj),
+                          JSONTypeContext *Py_UNUSED(tc)) {}
+
+static JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
+                               JSONTypeContext *Py_UNUSED(tc),
+                               size_t *Py_UNUSED(outLen)) {
+  return NULL;
+}
+
+//=============================================================================
+// Set iteration functions
+// itemValue is borrowed reference, no ref counting
+//=============================================================================
+static void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->itemValue = NULL;
+  GET_TC(tc)->iterator = PyObject_GetIter(obj);
+}
+
+static int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->itemValue) {
+    Py_DECREF(GET_TC(tc)->itemValue);
+    GET_TC(tc)->itemValue = NULL;
+  }
+
+  PyObject *item = PyIter_Next(GET_TC(tc)->iterator);
+
+  if (item == NULL) {
+    return 0;
+  }
+
+  GET_TC(tc)->itemValue = item;
+  return 1;
+}
+
+static void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->itemValue) {
+    Py_DECREF(GET_TC(tc)->itemValue);
+    GET_TC(tc)->itemValue = NULL;
+  }
+
+  if (GET_TC(tc)->iterator) {
+    Py_DECREF(GET_TC(tc)->iterator);
+    GET_TC(tc)->iterator = NULL;
+  }
+}
+
+static JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
+                             JSONTypeContext *Py_UNUSED(tc),
+                             size_t *Py_UNUSED(outLen)) {
+  return NULL;
+}
+
+//=============================================================================
+// Dir iteration functions
+// itemName ref is borrowed from PyObject_Dir (attrList). No refcount
+// itemValue ref is from PyObject_GetAttr. Ref counted
+//=============================================================================
+static void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->attrList = PyObject_Dir(obj);
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList);
+}
+
+static void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->itemValue) {
+    Py_DECREF(GET_TC(tc)->itemValue);
+    GET_TC(tc)->itemValue = NULL;
+  }
+
+  if (GET_TC(tc)->itemName) {
+    Py_DECREF(GET_TC(tc)->itemName);
+    GET_TC(tc)->itemName = NULL;
+  }
+
+  Py_DECREF((PyObject *)GET_TC(tc)->attrList);
+}
+
+static int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
+  PyObject *obj = (PyObject *)_obj;
+  PyObject *itemValue = GET_TC(tc)->itemValue;
+  PyObject *itemName = GET_TC(tc)->itemName;
+
+  if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
+    return 0;
+  }
+
+  if (itemValue) {
+    Py_DECREF(GET_TC(tc)->itemValue);
+    GET_TC(tc)->itemValue = itemValue = NULL;
+  }
+
+  if (itemName) {
+    Py_DECREF(GET_TC(tc)->itemName);
+    GET_TC(tc)->itemName = itemName = NULL;
+  }
+
+  for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) {
+    PyObject *attrName =
+        PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index);
+    PyObject *attr = PyUnicode_AsUTF8String(attrName);
+    const char *attrStr = PyBytes_AS_STRING(attr);
+
+    if (attrStr[0] == '_') {
+      Py_DECREF(attr);
+      continue;
+    }
+
+    itemValue = PyObject_GetAttr(obj, attrName);
+    if (itemValue == NULL) {
+      PyErr_Clear();
+      Py_DECREF(attr);
+      continue;
+    }
+
+    if (PyCallable_Check(itemValue)) {
+      Py_DECREF(itemValue);
+      Py_DECREF(attr);
+      continue;
+    }
+
+    GET_TC(tc)->itemName = itemName;
+    GET_TC(tc)->itemValue = itemValue;
+
+    itemName = attr;
+    break;
+  }
+
+  if (itemName == NULL) {
+    GET_TC(tc)->index = GET_TC(tc)->size;
+    GET_TC(tc)->itemValue = NULL;
+    return 0;
+  }
+
+  GET_TC(tc)->itemName = itemName;
+  GET_TC(tc)->itemValue = itemValue;
+  GET_TC(tc)->index++;
+
+  return 1;
+}
+
+static JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                             size_t *outLen) {
+  *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
+  return PyBytes_AS_STRING(GET_TC(tc)->itemName);
+}
+
+//=============================================================================
+// List iteration functions
+// itemValue is borrowed from object (which is list). No refcounting
+//=============================================================================
+static void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj);
+}
+
+static int List_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  if (GET_TC(tc)->index >= GET_TC(tc)->size) {
+    return 0;
+  }
+
+  GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index);
+  GET_TC(tc)->index++;
+  return 1;
+}
+
+static void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {
+}
+
+static JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
+                              JSONTypeContext *Py_UNUSED(tc),
+                              size_t *Py_UNUSED(outLen)) {
+  return NULL;
+}
+
+//=============================================================================
+// pandas Index iteration functions
+//=============================================================================
+static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
+}
+
+static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
+  const Py_ssize_t index = GET_TC(tc)->index;
+  Py_XDECREF(GET_TC(tc)->itemValue);
+  if (index == 0) {
+    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
+    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
+  } else if (index == 1) {
+    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    GET_TC(tc)->itemValue = get_values(obj);
+    if (!GET_TC(tc)->itemValue) {
+      return 0;
+    }
+  } else {
+    return 0;
+  }
+
+  GET_TC(tc)->index++;
+  return 1;
+}
+
+static void Index_iterEnd(JSOBJ Py_UNUSED(obj),
+                          JSONTypeContext *Py_UNUSED(tc)) {}
+
+static JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                               size_t *outLen) {
+  *outLen = strlen(GET_TC(tc)->cStr);
+  return GET_TC(tc)->cStr;
+}
+
+//=============================================================================
+// pandas Series iteration functions
+//=============================================================================
+static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  enc->outputFormat = VALUES; // for contained series
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
+}
+
+static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
+  const Py_ssize_t index = GET_TC(tc)->index;
+  Py_XDECREF(GET_TC(tc)->itemValue);
+  if (index == 0) {
+    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
+    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
+  } else if (index == 1) {
+    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
+    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
+  } else if (index == 2) {
+    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    GET_TC(tc)->itemValue = get_values(obj);
+    if (!GET_TC(tc)->itemValue) {
+      return 0;
+    }
+  } else {
+    return 0;
+  }
+
+  GET_TC(tc)->index++;
+  return 1;
+}
+
+static void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
+  enc->outputFormat = enc->originalOutputFormat;
+}
+
+static JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                                size_t *outLen) {
+  *outLen = strlen(GET_TC(tc)->cStr);
+  return GET_TC(tc)->cStr;
+}
+
+//=============================================================================
+// pandas DataFrame iteration functions
+//=============================================================================
+static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
+  GET_TC(tc)->index = 0;
+  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
+  enc->outputFormat = VALUES; // for contained series & index
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
+}
+
+static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
+  const Py_ssize_t index = GET_TC(tc)->index;
+  Py_XDECREF(GET_TC(tc)->itemValue);
+  if (index == 0) {
+    memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8);
+    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
+  } else if (index == 1) {
+    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
+    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
+  } else if (index == 2) {
+    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
+    Py_INCREF(obj);
+    GET_TC(tc)->itemValue = obj;
+  } else {
+    return 0;
+  }
+
+  GET_TC(tc)->index++;
+  return 1;
+}
+
+static void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
+  enc->outputFormat = enc->originalOutputFormat;
+}
+
+static JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                                   size_t *outLen) {
+  *outLen = strlen(GET_TC(tc)->cStr);
+  return GET_TC(tc)->cStr;
+}
+
+//=============================================================================
+// Dict iteration functions
+// itemName might converted to string (Python_Str). Do refCounting
+// itemValue is borrowed from object (which is dict). No refCounting
+//=============================================================================
+static void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  GET_TC(tc)->index = 0;
+}
+
+static int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->itemName) {
+    Py_DECREF(GET_TC(tc)->itemName);
+    GET_TC(tc)->itemName = NULL;
+  }
+
+  if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index,
+                   &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) {
+    return 0;
+  }
+
+  if (PyUnicode_Check(GET_TC(tc)->itemName)) {
+    GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
+  } else if (!PyBytes_Check(GET_TC(tc)->itemName)) {
+    GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName);
+    PyObject *itemNameTmp = GET_TC(tc)->itemName;
+    GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
+    Py_DECREF(itemNameTmp);
+  } else {
+    Py_INCREF(GET_TC(tc)->itemName);
+  }
+  return 1;
+}
+
+static void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (GET_TC(tc)->itemName) {
+    Py_DECREF(GET_TC(tc)->itemName);
+    GET_TC(tc)->itemName = NULL;
+  }
+  Py_DECREF(GET_TC(tc)->dictObj);
+}
+
+static JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->itemValue;
+}
+
+static char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
+                              size_t *outLen) {
+  *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
+  return PyBytes_AS_STRING(GET_TC(tc)->itemName);
+}
+
+static void NpyArr_freeLabels(char **labels, npy_intp len) {
+  if (labels) {
+    for (npy_intp i = 0; i < len; i++) {
+      PyObject_Free(labels[i]);
+    }
+    PyObject_Free(labels);
+  }
+}
+
+/*
+ * Function: NpyArr_encodeLabels
+ * -----------------------------
+ *
+ * Builds an array of "encoded" labels.
+ *
+ * labels: PyArrayObject pointer for labels to be "encoded"
+ * num : number of labels
+ *
+ * "encode" is quoted above because we aren't really doing encoding
+ * For historical reasons this function would actually encode the entire
+ * array into a separate buffer with a separate call to JSON_Encode
+ * and would leave it to complex pointer manipulation from there to
+ * unpack values as needed. To make things simpler and more idiomatic
+ * this has instead just stringified any input save for datetime values,
+ * which may need to be represented in various formats.
+ */
+static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
+                                  npy_intp num) {
+  // NOTE this function steals a reference to labels.
+  PyObject *item = NULL;
+  const NPY_DATETIMEUNIT base = enc->datetimeUnit;
+
+  if (!labels) {
+    return 0;
+  }
+
+  if (PyArray_SIZE(labels) < num) {
+    PyErr_SetString(PyExc_ValueError,
+                    "Label array sizes do not match corresponding data shape");
+    Py_DECREF(labels);
+    return 0;
+  }
+
+  char **ret = PyObject_Malloc(sizeof(char *) * num);
+  if (!ret) {
+    PyErr_NoMemory();
+    Py_DECREF(labels);
+    return 0;
+  }
+
+  for (npy_intp i = 0; i < num; i++) {
+    ret[i] = NULL;
+  }
+
+  const npy_intp stride = PyArray_STRIDE(labels, 0);
+  char *dataptr = PyArray_DATA(labels);
+  const int type_num = PyArray_TYPE(labels);
+  PyArray_Descr *dtype = PyArray_DESCR(labels);
+
+  for (npy_intp i = 0; i < num; i++) {
+    item = PyArray_GETITEM(labels, dataptr);
+    if (!item) {
+      NpyArr_freeLabels(ret, num);
+      ret = 0;
+      break;
+    }
+
+    int is_datetimelike = 0;
+    npy_int64 i8date;
+    NPY_DATETIMEUNIT dateUnit = NPY_FR_ns;
+    if (PyTypeNum_ISDATETIME(type_num)) {
+      is_datetimelike = 1;
+      i8date = *(npy_int64 *)dataptr;
+      dateUnit = get_datetime_metadata_from_dtype(dtype).base;
+    } else if (PyDate_Check(item) || PyDelta_Check(item)) {
+      is_datetimelike = 1;
+      if (PyObject_HasAttrString(item, "_value")) {
+        // pd.Timestamp object or pd.NaT
+        // see test_date_index_and_values for case with non-nano
+        i8date = get_long_attr(item, "_value");
+      } else {
+        if (PyDelta_Check(item)) {
+          i8date = total_seconds(item) * 1000000000LL; // nanoseconds per second
+        } else {
+          // datetime.* objects don't follow above rules
+          i8date = PyDateTimeToEpoch(item, NPY_FR_ns);
+        }
+      }
+    }
+
+    size_t len;
+    char *cLabel;
+    if (is_datetimelike) {
+      if (i8date == get_nat()) {
+        len = 4;
+        cLabel = PyObject_Malloc(len + 1);
+        strncpy(cLabel, "null", len + 1);
+      } else {
+        if (enc->datetimeIso) {
+          if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
+            // TODO(username): non-nano timedelta support?
+            cLabel = int64ToIsoDuration(i8date, &len);
+          } else {
+            if (type_num == NPY_DATETIME) {
+              cLabel = int64ToIso(i8date, dateUnit, base, &len);
+            } else {
+              cLabel = PyDateTimeToIso(item, base, &len);
+            }
+          }
+          if (cLabel == NULL) {
+            Py_DECREF(item);
+            NpyArr_freeLabels(ret, num);
+            ret = 0;
+            break;
+          }
+        } else {
+          int size_of_cLabel = 21; // 21 chars for int 64
+          cLabel = PyObject_Malloc(size_of_cLabel);
+          if (scaleNanosecToUnit(&i8date, base) == -1) {
+            NpyArr_freeLabels(ret, num);
+            ret = 0;
+            break;
+          }
+          snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, i8date);
+          len = strlen(cLabel);
+        }
+      }
+    } else { // Fallback to string representation
+      // Replace item with the string to keep it alive.
+      Py_SETREF(item, PyObject_Str(item));
+      if (item == NULL) {
+        NpyArr_freeLabels(ret, num);
+        ret = 0;
+        break;
+      }
+
+      cLabel = (char *)PyUnicode_AsUTF8(item);
+      len = strlen(cLabel);
+    }
+
+    // Add 1 to include NULL terminator
+    ret[i] = PyObject_Malloc(len + 1);
+    memcpy(ret[i], cLabel, len + 1);
+    Py_DECREF(item);
+
+    if (is_datetimelike) {
+      PyObject_Free(cLabel);
+    }
+
+    if (PyErr_Occurred()) {
+      NpyArr_freeLabels(ret, num);
+      ret = 0;
+      break;
+    }
+
+    if (!ret[i]) {
+      PyErr_NoMemory();
+      ret = 0;
+      break;
+    }
+
+    dataptr += stride;
+  }
+
+  Py_DECREF(labels);
+  return ret;
+}
+
+static void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) {
+  PyObject *tmpObj = NULL;
+  tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL);
+  if (!PyErr_Occurred()) {
+    if (tmpObj == NULL) {
+      PyErr_SetString(PyExc_TypeError, "Failed to execute default handler");
+    } else {
+      encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0);
+    }
+  }
+  Py_XDECREF(tmpObj);
+  return;
+}
+
+static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
+  tc->prv = NULL;
+
+  if (!_obj) {
+    tc->type = JT_INVALID;
+    return;
+  }
+
+  PyObject *obj = (PyObject *)_obj;
+  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
+
+  if (PyBool_Check(obj)) {
+    tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE;
+    return;
+  } else if (obj == Py_None) {
+    tc->type = JT_NULL;
+    return;
+  }
+
+  TypeContext *pc = createTypeContext();
+  if (!pc) {
+    tc->type = JT_INVALID;
+    return;
+  }
+  tc->prv = pc;
+
+  if (PyTypeNum_ISDATETIME(enc->npyType)) {
+    int64_t longVal = *(npy_int64 *)enc->npyValue;
+    if (longVal == get_nat()) {
+      tc->type = JT_NULL;
+    } else {
+      if (enc->datetimeIso) {
+        if (enc->npyType == NPY_TIMEDELTA) {
+          pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
+        } else {
+          pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
+        }
+        // Currently no way to pass longVal to iso function, so use
+        // state management
+        pc->longValue = longVal;
+        tc->type = JT_UTF8;
+      } else {
+        NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+        if (scaleNanosecToUnit(&longVal, base) == -1) {
+          goto INVALID;
+        }
+        pc->longValue = longVal;
+        tc->type = JT_LONG;
+      }
+    }
+
+    // TODO(username): this prevents infinite loop with
+    // mixed-type DataFrames;
+    // refactor
+    enc->npyCtxtPassthru = NULL;
+    enc->npyType = -1;
+    return;
+  }
+
+  if (PyIter_Check(obj) || (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) {
+    goto ISITERABLE;
+  }
+
+  if (PyLong_Check(obj)) {
+    tc->type = JT_LONG;
+    int overflow = 0;
+    pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
+    int err;
+    err = (pc->longValue == -1) && PyErr_Occurred();
+
+    if (overflow) {
+      tc->type = JT_BIGNUM;
+    } else if (err) {
+      goto INVALID;
+    }
+
+    return;
+  } else if (PyFloat_Check(obj)) {
+    const double val = PyFloat_AS_DOUBLE(obj);
+    if (npy_isnan(val) || npy_isinf(val)) {
+      tc->type = JT_NULL;
+    } else {
+      pc->doubleValue = val;
+      tc->type = JT_DOUBLE;
+    }
+    return;
+  } else if (PyBytes_Check(obj)) {
+    pc->PyTypeToUTF8 = PyBytesToUTF8;
+    tc->type = JT_UTF8;
+    return;
+  } else if (PyUnicode_Check(obj)) {
+    pc->PyTypeToUTF8 = PyUnicodeToUTF8;
+    tc->type = JT_UTF8;
+    return;
+  } else if (object_is_decimal_type(obj)) {
+    pc->doubleValue = PyFloat_AsDouble(obj);
+    tc->type = JT_DOUBLE;
+    return;
+  } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
+    if (object_is_nat_type(obj)) {
+      tc->type = JT_NULL;
+      return;
+    }
+
+    if (enc->datetimeIso) {
+      pc->PyTypeToUTF8 = PyDateTimeToIsoCallback;
+      tc->type = JT_UTF8;
+    } else {
+      NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+      pc->longValue = PyDateTimeToEpoch(obj, base);
+      tc->type = JT_LONG;
+    }
+    return;
+  } else if (PyTime_Check(obj)) {
+    pc->PyTypeToUTF8 = PyTimeToJSON;
+    tc->type = JT_UTF8;
+    return;
+  } else if (PyArray_IsScalar(obj, Datetime)) {
+    npy_int64 longVal;
+    if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) {
+      tc->type = JT_NULL;
+      return;
+    }
+    PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
+    if (!PyTypeNum_ISDATETIME(dtype->type_num)) {
+      PyErr_Format(PyExc_ValueError, "Could not get resolution of datetime");
+      return;
+    }
+
+    PyArray_Descr *outcode = PyArray_DescrFromType(NPY_INT64);
+    PyArray_CastScalarToCtype(obj, &longVal, outcode);
+    Py_DECREF(outcode);
+
+    if (enc->datetimeIso) {
+      GET_TC(tc)->longValue = longVal;
+      pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
+      enc->valueUnit = get_datetime_metadata_from_dtype(dtype).base;
+      tc->type = JT_UTF8;
+    } else {
+      NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+      pc->longValue = PyDateTimeToEpoch(obj, base);
+      tc->type = JT_LONG;
+    }
+    return;
+  } else if (PyDelta_Check(obj)) {
+    npy_int64 value =
+        PyObject_HasAttrString(obj, "_value") ? get_long_attr(obj, "_value")
+                                              : // pd.Timedelta object or pd.NaT
+            total_seconds(obj) * 1000000000LL;  // nanoseconds per sec
+
+    if (value == get_nat()) {
+      tc->type = JT_NULL;
+      return;
+    } else if (enc->datetimeIso) {
+      pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
+      tc->type = JT_UTF8;
+    } else {
+      const int unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
+      if (scaleNanosecToUnit(&value, unit) != 0) {
+        // TODO(username): Add some kind of error handling here
+      }
+
+      if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_OverflowError)) {
+        goto INVALID;
+      }
+
+      tc->type = JT_LONG;
+    }
+    pc->longValue = value;
+    return;
+  } else if (PyArray_IsScalar(obj, Integer)) {
+    tc->type = JT_LONG;
+    PyArray_CastScalarToCtype(obj, &(pc->longValue),
+                              PyArray_DescrFromType(NPY_INT64));
+
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_OverflowError)) {
+      goto INVALID;
+    }
+
+    return;
+  } else if (PyArray_IsScalar(obj, Bool)) {
+    PyArray_CastScalarToCtype(obj, &(pc->longValue),
+                              PyArray_DescrFromType(NPY_BOOL));
+    tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
+    return;
+  } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
+    PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
+                              PyArray_DescrFromType(NPY_DOUBLE));
+    tc->type = JT_DOUBLE;
+    return;
+  } else if (PyArray_CheckScalar(obj)) {
+    PyErr_Format(PyExc_TypeError,
+                 "%R (numpy-scalar) is not JSON serializable at the moment",
+                 obj);
+    goto INVALID;
+  } else if (object_is_na_type(obj)) {
+    tc->type = JT_NULL;
+    return;
+  }
+
+ISITERABLE:
+
+  if (object_is_index_type(obj)) {
+    if (enc->outputFormat == SPLIT) {
+      tc->type = JT_OBJECT;
+      pc->iterBegin = Index_iterBegin;
+      pc->iterEnd = Index_iterEnd;
+      pc->iterNext = Index_iterNext;
+      pc->iterGetValue = Index_iterGetValue;
+      pc->iterGetName = Index_iterGetName;
+      return;
+    }
+
+    pc->newObj = get_values(obj);
+    if (pc->newObj) {
+      tc->type = JT_ARRAY;
+      pc->iterBegin = NpyArr_iterBegin;
+      pc->iterEnd = NpyArr_iterEnd;
+      pc->iterNext = NpyArr_iterNext;
+      pc->iterGetValue = NpyArr_iterGetValue;
+      pc->iterGetName = NpyArr_iterGetName;
+    } else {
+      goto INVALID;
+    }
+
+    return;
+  } else if (object_is_series_type(obj)) {
+    if (enc->outputFormat == SPLIT) {
+      tc->type = JT_OBJECT;
+      pc->iterBegin = Series_iterBegin;
+      pc->iterEnd = Series_iterEnd;
+      pc->iterNext = Series_iterNext;
+      pc->iterGetValue = Series_iterGetValue;
+      pc->iterGetName = Series_iterGetName;
+      return;
+    }
+
+    pc->newObj = get_values(obj);
+    if (!pc->newObj) {
+      goto INVALID;
+    }
+
+    if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) {
+      tc->type = JT_OBJECT;
+      PyObject *tmpObj = PyObject_GetAttrString(obj, "index");
+      if (!tmpObj) {
+        goto INVALID;
+      }
+      PyObject *values = get_values(tmpObj);
+      Py_DECREF(tmpObj);
+      if (!values) {
+        goto INVALID;
+      }
+      pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);
+      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
+                                             pc->columnLabelsLen);
+      if (!pc->columnLabels) {
+        goto INVALID;
+      }
+    } else {
+      tc->type = JT_ARRAY;
+    }
+    pc->iterBegin = NpyArr_iterBegin;
+    pc->iterEnd = NpyArr_iterEnd;
+    pc->iterNext = NpyArr_iterNext;
+    pc->iterGetValue = NpyArr_iterGetValue;
+    pc->iterGetName = NpyArr_iterGetName;
+    return;
+  } else if (PyArray_Check(obj)) {
+    if (enc->npyCtxtPassthru) {
+      pc->npyarr = enc->npyCtxtPassthru;
+      tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY);
+
+      pc->iterBegin = NpyArrPassThru_iterBegin;
+      pc->iterNext = NpyArr_iterNext;
+      pc->iterEnd = NpyArrPassThru_iterEnd;
+      pc->iterGetValue = NpyArr_iterGetValue;
+      pc->iterGetName = NpyArr_iterGetName;
+
+      enc->npyCtxtPassthru = NULL;
+      return;
+    }
+
+    tc->type = JT_ARRAY;
+    pc->iterBegin = NpyArr_iterBegin;
+    pc->iterEnd = NpyArr_iterEnd;
+    pc->iterNext = NpyArr_iterNext;
+    pc->iterGetValue = NpyArr_iterGetValue;
+    pc->iterGetName = NpyArr_iterGetName;
+    return;
+  } else if (object_is_dataframe_type(obj)) {
+    if (enc->blkCtxtPassthru) {
+      pc->pdblock = enc->blkCtxtPassthru;
+      tc->type =
+          (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY);
+
+      pc->iterBegin = PdBlockPassThru_iterBegin;
+      pc->iterEnd = PdBlockPassThru_iterEnd;
+      pc->iterNext = PdBlock_iterNextItem;
+      pc->iterGetName = PdBlock_iterGetName;
+      pc->iterGetValue = NpyArr_iterGetValue;
+
+      enc->blkCtxtPassthru = NULL;
+      return;
+    }
+
+    if (enc->outputFormat == SPLIT) {
+      tc->type = JT_OBJECT;
+      pc->iterBegin = DataFrame_iterBegin;
+      pc->iterEnd = DataFrame_iterEnd;
+      pc->iterNext = DataFrame_iterNext;
+      pc->iterGetValue = DataFrame_iterGetValue;
+      pc->iterGetName = DataFrame_iterGetName;
+      return;
+    }
+
+    pc->iterBegin = PdBlock_iterBegin;
+    pc->iterEnd = PdBlock_iterEnd;
+    pc->iterNext = PdBlock_iterNext;
+    pc->iterGetName = PdBlock_iterGetName;
+    pc->iterGetValue = NpyArr_iterGetValue;
+
+    if (enc->outputFormat == VALUES) {
+      tc->type = JT_ARRAY;
+    } else if (enc->outputFormat == RECORDS) {
+      tc->type = JT_ARRAY;
+      PyObject *tmpObj = PyObject_GetAttrString(obj, "columns");
+      if (!tmpObj) {
+        goto INVALID;
+      }
+      PyObject *values = get_values(tmpObj);
+      if (!values) {
+        Py_DECREF(tmpObj);
+        goto INVALID;
+      }
+      pc->columnLabelsLen = PyObject_Size(tmpObj);
+      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
+                                             pc->columnLabelsLen);
+      Py_DECREF(tmpObj);
+      if (!pc->columnLabels) {
+        goto INVALID;
+      }
+    } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) {
+      tc->type = JT_OBJECT;
+      PyObject *tmpObj =
+          (enc->outputFormat == INDEX ? PyObject_GetAttrString(obj, "index")
+                                      : PyObject_GetAttrString(obj, "columns"));
+      if (!tmpObj) {
+        goto INVALID;
+      }
+      PyObject *values = get_values(tmpObj);
+      if (!values) {
+        Py_DECREF(tmpObj);
+        goto INVALID;
+      }
+      pc->rowLabelsLen = PyObject_Size(tmpObj);
+      pc->rowLabels =
+          NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->rowLabelsLen);
+      Py_DECREF(tmpObj);
+      tmpObj =
+          (enc->outputFormat == INDEX ? PyObject_GetAttrString(obj, "columns")
+                                      : PyObject_GetAttrString(obj, "index"));
+      if (!tmpObj) {
+        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
+        pc->rowLabels = NULL;
+        goto INVALID;
+      }
+      values = get_values(tmpObj);
+      if (!values) {
+        Py_DECREF(tmpObj);
+        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
+        pc->rowLabels = NULL;
+        goto INVALID;
+      }
+      pc->columnLabelsLen = PyObject_Size(tmpObj);
+      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
+                                             pc->columnLabelsLen);
+      Py_DECREF(tmpObj);
+      if (!pc->columnLabels) {
+        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
+        pc->rowLabels = NULL;
+        goto INVALID;
+      }
+
+      if (enc->outputFormat == COLUMNS) {
+        pc->transpose = 1;
+      }
+    } else {
+      goto INVALID;
+    }
+    return;
+  } else if (PyDict_Check(obj)) {
+    tc->type = JT_OBJECT;
+    pc->iterBegin = Dict_iterBegin;
+    pc->iterEnd = Dict_iterEnd;
+    pc->iterNext = Dict_iterNext;
+    pc->iterGetValue = Dict_iterGetValue;
+    pc->iterGetName = Dict_iterGetName;
+    pc->dictObj = obj;
+    Py_INCREF(obj);
+
+    return;
+  } else if (PyList_Check(obj)) {
+    tc->type = JT_ARRAY;
+    pc->iterBegin = List_iterBegin;
+    pc->iterEnd = List_iterEnd;
+    pc->iterNext = List_iterNext;
+    pc->iterGetValue = List_iterGetValue;
+    pc->iterGetName = List_iterGetName;
+    return;
+  } else if (PyTuple_Check(obj)) {
+    tc->type = JT_ARRAY;
+    pc->iterBegin = Tuple_iterBegin;
+    pc->iterEnd = Tuple_iterEnd;
+    pc->iterNext = Tuple_iterNext;
+    pc->iterGetValue = Tuple_iterGetValue;
+    pc->iterGetName = Tuple_iterGetName;
+    return;
+  } else if (PyAnySet_Check(obj)) {
+    tc->type = JT_ARRAY;
+    pc->iterBegin = Set_iterBegin;
+    pc->iterEnd = Set_iterEnd;
+    pc->iterNext = Set_iterNext;
+    pc->iterGetValue = Set_iterGetValue;
+    pc->iterGetName = Set_iterGetName;
+    return;
+  }
+
+  PyObject *toDictFunc = PyObject_GetAttrString(obj, "toDict");
+
+  if (toDictFunc) {
+    PyObject *tuple = PyTuple_New(0);
+    PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
+    Py_DECREF(tuple);
+    Py_DECREF(toDictFunc);
+
+    if (toDictResult == NULL) {
+      PyErr_Clear();
+      tc->type = JT_NULL;
+      return;
+    }
+
+    if (!PyDict_Check(toDictResult)) {
+      Py_DECREF(toDictResult);
+      tc->type = JT_NULL;
+      return;
+    }
+
+    tc->type = JT_OBJECT;
+    pc->iterBegin = Dict_iterBegin;
+    pc->iterEnd = Dict_iterEnd;
+    pc->iterNext = Dict_iterNext;
+    pc->iterGetValue = Dict_iterGetValue;
+    pc->iterGetName = Dict_iterGetName;
+    pc->dictObj = toDictResult;
+    return;
+  }
+
+  PyErr_Clear();
+
+  if (enc->defaultHandler) {
+    Object_invokeDefaultHandler(obj, enc);
+    goto INVALID;
+  }
+
+  tc->type = JT_OBJECT;
+  pc->iterBegin = Dir_iterBegin;
+  pc->iterEnd = Dir_iterEnd;
+  pc->iterNext = Dir_iterNext;
+  pc->iterGetValue = Dir_iterGetValue;
+  pc->iterGetName = Dir_iterGetName;
+  return;
+
+INVALID:
+  tc->type = JT_INVALID;
+  PyObject_Free(tc->prv);
+  tc->prv = NULL;
+  return;
+}
+
+static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  if (tc->prv) {
+    Py_XDECREF(GET_TC(tc)->newObj);
+    GET_TC(tc)->newObj = NULL;
+    NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen);
+    GET_TC(tc)->rowLabels = NULL;
+    NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
+    GET_TC(tc)->columnLabels = NULL;
+    PyObject_Free(GET_TC(tc)->cStr);
+    GET_TC(tc)->cStr = NULL;
+    PyObject_Free(tc->prv);
+    tc->prv = NULL;
+  }
+}
+
+static const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc,
+                                         size_t *_outLen) {
+  return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen);
+}
+
+static JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->longValue;
+}
+
+static double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
+  return GET_TC(tc)->doubleValue;
+}
+
+static const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
+                                               size_t *_outLen) {
+  PyObject *repr = PyObject_Str(obj);
+  const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen);
+  char *bytes = PyObject_Malloc(*_outLen + 1);
+  memcpy(bytes, str, *_outLen + 1);
+  GET_TC(tc)->cStr = bytes;
+
+  Py_DECREF(repr);
+
+  return GET_TC(tc)->cStr;
+}
+
+static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); }
+
+static void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->iterBegin(obj, tc);
+}
+
+static int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) {
+  return GET_TC(tc)->iterNext(obj, tc);
+}
+
+static void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
+  GET_TC(tc)->iterEnd(obj, tc);
+}
+
+static JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
+  return GET_TC(tc)->iterGetValue(obj, tc);
+}
+
+static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
+                                size_t *outLen) {
+  return GET_TC(tc)->iterGetName(obj, tc, outLen);
+}
+
+PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
+                    PyObject *kwargs) {
+  PyDateTime_IMPORT;
+  if (PyDateTimeAPI == NULL) {
+    return NULL;
+  }
+
+  PandasDateTime_IMPORT;
+  if (PandasDateTimeAPI == NULL) {
+    return NULL;
+  }
+
+  static char *kwlist[] = {"obj",
+                           "ensure_ascii",
+                           "double_precision",
+                           "encode_html_chars",
+                           "orient",
+                           "date_unit",
+                           "iso_dates",
+                           "default_handler",
+                           "indent",
+                           NULL};
+
+  PyObject *oinput = NULL;
+  PyObject *oensureAscii = NULL;
+  int idoublePrecision = 10; // default double precision setting
+  PyObject *oencodeHTMLChars = NULL;
+  char *sOrient = NULL;
+  char *sdateFormat = NULL;
+  PyObject *oisoDates = 0;
+  PyObject *odefHandler = 0;
+  int indent = 0;
+
+  PyObjectEncoder pyEncoder = {{
+      Object_beginTypeContext,
+      Object_endTypeContext,
+      Object_getStringValue,
+      Object_getLongValue,
+      NULL, // getIntValue is unused
+      Object_getDoubleValue,
+      Object_getBigNumStringValue,
+      Object_iterBegin,
+      Object_iterNext,
+      Object_iterEnd,
+      Object_iterGetValue,
+      Object_iterGetName,
+      Object_releaseObject,
+      PyObject_Malloc,
+      PyObject_Realloc,
+      PyObject_Free,
+      -1, // recursionMax
+      idoublePrecision,
+      1,      // forceAscii
+      0,      // encodeHTMLChars
+      indent, // indent
+  }};
+  JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;
+
+  pyEncoder.npyCtxtPassthru = NULL;
+  pyEncoder.blkCtxtPassthru = NULL;
+  pyEncoder.npyType = -1;
+  pyEncoder.npyValue = NULL;
+  pyEncoder.datetimeIso = 0;
+  pyEncoder.datetimeUnit = NPY_FR_ms;
+  pyEncoder.outputFormat = COLUMNS;
+  pyEncoder.defaultHandler = 0;
+
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, &oinput,
+                                   &oensureAscii, &idoublePrecision,
+                                   &oencodeHTMLChars, &sOrient, &sdateFormat,
+                                   &oisoDates, &odefHandler, &indent)) {
+    return NULL;
+  }
+
+  if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) {
+    encoder->forceASCII = 0;
+  }
+
+  if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) {
+    encoder->encodeHTMLChars = 1;
+  }
+
+  if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) {
+    PyErr_Format(
+        PyExc_ValueError,
+        "Invalid value '%d' for option 'double_precision', max is '%u'",
+        idoublePrecision, JSON_DOUBLE_MAX_DECIMALS);
+    return NULL;
+  }
+  encoder->doublePrecision = idoublePrecision;
+
+  if (sOrient != NULL) {
+    if (strcmp(sOrient, "records") == 0) {
+      pyEncoder.outputFormat = RECORDS;
+    } else if (strcmp(sOrient, "index") == 0) {
+      pyEncoder.outputFormat = INDEX;
+    } else if (strcmp(sOrient, "split") == 0) {
+      pyEncoder.outputFormat = SPLIT;
+    } else if (strcmp(sOrient, "values") == 0) {
+      pyEncoder.outputFormat = VALUES;
+    } else if (strcmp(sOrient, "columns") != 0) {
+      PyErr_Format(PyExc_ValueError, "Invalid value '%s' for option 'orient'",
+                   sOrient);
+      return NULL;
+    }
+  }
+
+  if (sdateFormat != NULL) {
+    if (strcmp(sdateFormat, "s") == 0) {
+      pyEncoder.datetimeUnit = NPY_FR_s;
+    } else if (strcmp(sdateFormat, "ms") == 0) {
+      pyEncoder.datetimeUnit = NPY_FR_ms;
+    } else if (strcmp(sdateFormat, "us") == 0) {
+      pyEncoder.datetimeUnit = NPY_FR_us;
+    } else if (strcmp(sdateFormat, "ns") == 0) {
+      pyEncoder.datetimeUnit = NPY_FR_ns;
+    } else {
+      PyErr_Format(PyExc_ValueError,
+                   "Invalid value '%s' for option 'date_unit'", sdateFormat);
+      return NULL;
+    }
+  }
+
+  if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) {
+    pyEncoder.datetimeIso = 1;
+  }
+
+  if (odefHandler != NULL && odefHandler != Py_None) {
+    if (!PyCallable_Check(odefHandler)) {
+      PyErr_SetString(PyExc_TypeError, "Default handler is not callable");
+      return NULL;
+    }
+    pyEncoder.defaultHandler = odefHandler;
+  }
+
+  encoder->indent = indent;
+
+  pyEncoder.originalOutputFormat = pyEncoder.outputFormat;
+
+  char buffer[65536];
+  char *ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer));
+  if (PyErr_Occurred()) {
+    return NULL;
+  }
+
+  if (encoder->errorMsg) {
+    if (ret != buffer) {
+      encoder->free(ret);
+    }
+    PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg);
+    return NULL;
+  }
+
+  PyObject *newobj = PyUnicode_FromString(ret);
+
+  if (ret != buffer) {
+    encoder->free(ret);
+  }
+
+  return newobj;
+}
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index a431842218b3b..5ce6845991da4 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -16,7 +16,7 @@
     hashtable,
     lib,
 )
-from pandas._libs.hashtable import unique_label_indices
+from pandas._libs.hashtable_cpp import unique_label_indices
 
 from pandas.core.dtypes.common import (
     ensure_int64,
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index e54764f9ac4a6..928deed89f057 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -7,7 +7,10 @@
 import numpy as np
 import pytest
 
-from pandas._libs import hashtable as ht
+from pandas._libs import (
+    hashtable as ht,
+    hashtable_cpp as ht_cpp,
+)
 
 import pandas as pd
 import pandas._testing as tm
@@ -665,7 +668,7 @@ def test_modes_with_nans():
 def test_unique_label_indices_intp(writable):
     keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
     keys.flags.writeable = writable
-    result = ht.unique_label_indices(keys)
+    result = ht_cpp.unique_label_indices(keys)
     expected = np.array([0, 1, 5], dtype=np.intp)
     tm.assert_numpy_array_equal(result, expected)
 
@@ -673,13 +676,13 @@ def test_unique_label_indices_intp(writable):
 def test_unique_label_indices():
     a = np.random.default_rng(2).integers(1, 1 << 10, 1 << 15).astype(np.intp)
 
-    left = ht.unique_label_indices(a)
+    left = ht_cpp.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1]
 
     tm.assert_numpy_array_equal(left, right, check_dtype=False)
 
     a[np.random.default_rng(2).choice(len(a), 10)] = -1
-    left = ht.unique_label_indices(a)
+    left = ht_cpp.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1][1:]
     tm.assert_numpy_array_equal(left, right, check_dtype=False)
 

From 650e33666d1dcbf16e53e46ead488e8e6eab480d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 12:44:10 -0800
Subject: [PATCH 03/14] Revert "move non-tempita code out of tempita"

This reverts commit 192a242135cbaf45fac94c145e1d37d23c4f5095.
---
 pandas/_libs/hashtable.pyx                | 166 ----------------------
 pandas/_libs/hashtable_func_helper.pxi.in | 165 +++++++++++++++++++++
 2 files changed, 165 insertions(+), 166 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 6dad3a43ba8b2..8732d3e075537 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -121,169 +121,3 @@ cdef class ObjectFactorizer(Factorizer):
                                        self.count, na_sentinel, na_value)
         self.count = len(self.uniques)
         return labels
-
-ctypedef fused htfunc_t:
-    numeric_object_t
-    complex128_t
-    complex64_t
-
-
-cpdef value_count(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
-    if htfunc_t is object:
-        return value_count_object(values, dropna, mask=mask)
-
-    elif htfunc_t is int8_t:
-        return value_count_int8(values, dropna, mask=mask)
-    elif htfunc_t is int16_t:
-        return value_count_int16(values, dropna, mask=mask)
-    elif htfunc_t is int32_t:
-        return value_count_int32(values, dropna, mask=mask)
-    elif htfunc_t is int64_t:
-        return value_count_int64(values, dropna, mask=mask)
-
-    elif htfunc_t is uint8_t:
-        return value_count_uint8(values, dropna, mask=mask)
-    elif htfunc_t is uint16_t:
-        return value_count_uint16(values, dropna, mask=mask)
-    elif htfunc_t is uint32_t:
-        return value_count_uint32(values, dropna, mask=mask)
-    elif htfunc_t is uint64_t:
-        return value_count_uint64(values, dropna, mask=mask)
-
-    elif htfunc_t is float64_t:
-        return value_count_float64(values, dropna, mask=mask)
-    elif htfunc_t is float32_t:
-        return value_count_float32(values, dropna, mask=mask)
-
-    elif htfunc_t is complex128_t:
-        return value_count_complex128(values, dropna, mask=mask)
-    elif htfunc_t is complex64_t:
-        return value_count_complex64(values, dropna, mask=mask)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-cpdef duplicated(ndarray[htfunc_t] values,
-                 object keep="first",
-                 const uint8_t[:] mask=None):
-    if htfunc_t is object:
-        return duplicated_object(values, keep, mask=mask)
-
-    elif htfunc_t is int8_t:
-        return duplicated_int8(values, keep, mask=mask)
-    elif htfunc_t is int16_t:
-        return duplicated_int16(values, keep, mask=mask)
-    elif htfunc_t is int32_t:
-        return duplicated_int32(values, keep, mask=mask)
-    elif htfunc_t is int64_t:
-        return duplicated_int64(values, keep, mask=mask)
-
-    elif htfunc_t is uint8_t:
-        return duplicated_uint8(values, keep, mask=mask)
-    elif htfunc_t is uint16_t:
-        return duplicated_uint16(values, keep, mask=mask)
-    elif htfunc_t is uint32_t:
-        return duplicated_uint32(values, keep, mask=mask)
-    elif htfunc_t is uint64_t:
-        return duplicated_uint64(values, keep, mask=mask)
-
-    elif htfunc_t is float64_t:
-        return duplicated_float64(values, keep, mask=mask)
-    elif htfunc_t is float32_t:
-        return duplicated_float32(values, keep, mask=mask)
-
-    elif htfunc_t is complex128_t:
-        return duplicated_complex128(values, keep, mask=mask)
-    elif htfunc_t is complex64_t:
-        return duplicated_complex64(values, keep, mask=mask)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
-    if htfunc_t is object:
-        return ismember_object(arr, values)
-
-    elif htfunc_t is int8_t:
-        return ismember_int8(arr, values)
-    elif htfunc_t is int16_t:
-        return ismember_int16(arr, values)
-    elif htfunc_t is int32_t:
-        return ismember_int32(arr, values)
-    elif htfunc_t is int64_t:
-        return ismember_int64(arr, values)
-
-    elif htfunc_t is uint8_t:
-        return ismember_uint8(arr, values)
-    elif htfunc_t is uint16_t:
-        return ismember_uint16(arr, values)
-    elif htfunc_t is uint32_t:
-        return ismember_uint32(arr, values)
-    elif htfunc_t is uint64_t:
-        return ismember_uint64(arr, values)
-
-    elif htfunc_t is float64_t:
-        return ismember_float64(arr, values)
-    elif htfunc_t is float32_t:
-        return ismember_float32(arr, values)
-
-    elif htfunc_t is complex128_t:
-        return ismember_complex128(arr, values)
-    elif htfunc_t is complex64_t:
-        return ismember_complex64(arr, values)
-
-    else:
-        raise TypeError(values.dtype)
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
-    # TODO(cython3): use const htfunct_t[:]
-
-    cdef:
-        ndarray[htfunc_t] keys
-        ndarray[htfunc_t] modes
-        ndarray[uint8_t] res_mask = None
-
-        int64_t[::1] counts
-        int64_t count, _, max_count = -1
-        Py_ssize_t nkeys, k, na_counter, j = 0
-
-    keys, counts, na_counter = value_count(values, dropna, mask=mask)
-    nkeys = len(keys)
-
-    modes = np.empty(nkeys, dtype=values.dtype)
-
-    if htfunc_t is not object:
-        with nogil:
-            for k in range(nkeys):
-                count = counts[k]
-                if count == max_count:
-                    j += 1
-                elif count > max_count:
-                    max_count = count
-                    j = 0
-                else:
-                    continue
-
-                modes[j] = keys[k]
-    else:
-        for k in range(nkeys):
-            count = counts[k]
-            if count == max_count:
-                j += 1
-            elif count > max_count:
-                max_count = count
-                j = 0
-            else:
-                continue
-
-            modes[j] = keys[k]
-
-    if na_counter > 0:
-        res_mask = np.zeros(j+1, dtype=np.bool_)
-        res_mask[j] = True
-    return modes[:j + 1], res_mask
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index 69e9cd0b90fa5..963dedbe7ec3b 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -280,3 +280,168 @@ cdef ismember_{{dtype}}(const {{dtype}}_t[:] arr, const {{dtype}}_t[:] values):
 # ----------------------------------------------------------------------
 
 {{endfor}}
+
+
+ctypedef fused htfunc_t:
+    numeric_object_t
+    complex128_t
+    complex64_t
+
+
+cpdef value_count(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
+    if htfunc_t is object:
+        return value_count_object(values, dropna, mask=mask)
+
+    elif htfunc_t is int8_t:
+        return value_count_int8(values, dropna, mask=mask)
+    elif htfunc_t is int16_t:
+        return value_count_int16(values, dropna, mask=mask)
+    elif htfunc_t is int32_t:
+        return value_count_int32(values, dropna, mask=mask)
+    elif htfunc_t is int64_t:
+        return value_count_int64(values, dropna, mask=mask)
+
+    elif htfunc_t is uint8_t:
+        return value_count_uint8(values, dropna, mask=mask)
+    elif htfunc_t is uint16_t:
+        return value_count_uint16(values, dropna, mask=mask)
+    elif htfunc_t is uint32_t:
+        return value_count_uint32(values, dropna, mask=mask)
+    elif htfunc_t is uint64_t:
+        return value_count_uint64(values, dropna, mask=mask)
+
+    elif htfunc_t is float64_t:
+        return value_count_float64(values, dropna, mask=mask)
+    elif htfunc_t is float32_t:
+        return value_count_float32(values, dropna, mask=mask)
+
+    elif htfunc_t is complex128_t:
+        return value_count_complex128(values, dropna, mask=mask)
+    elif htfunc_t is complex64_t:
+        return value_count_complex64(values, dropna, mask=mask)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+cpdef duplicated(ndarray[htfunc_t] values, object keep="first", const uint8_t[:] mask=None):
+    if htfunc_t is object:
+        return duplicated_object(values, keep, mask=mask)
+
+    elif htfunc_t is int8_t:
+        return duplicated_int8(values, keep, mask=mask)
+    elif htfunc_t is int16_t:
+        return duplicated_int16(values, keep, mask=mask)
+    elif htfunc_t is int32_t:
+        return duplicated_int32(values, keep, mask=mask)
+    elif htfunc_t is int64_t:
+        return duplicated_int64(values, keep, mask=mask)
+
+    elif htfunc_t is uint8_t:
+        return duplicated_uint8(values, keep, mask=mask)
+    elif htfunc_t is uint16_t:
+        return duplicated_uint16(values, keep, mask=mask)
+    elif htfunc_t is uint32_t:
+        return duplicated_uint32(values, keep, mask=mask)
+    elif htfunc_t is uint64_t:
+        return duplicated_uint64(values, keep, mask=mask)
+
+    elif htfunc_t is float64_t:
+        return duplicated_float64(values, keep, mask=mask)
+    elif htfunc_t is float32_t:
+        return duplicated_float32(values, keep, mask=mask)
+
+    elif htfunc_t is complex128_t:
+        return duplicated_complex128(values, keep, mask=mask)
+    elif htfunc_t is complex64_t:
+        return duplicated_complex64(values, keep, mask=mask)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+cpdef ismember(ndarray[htfunc_t] arr, ndarray[htfunc_t] values):
+    if htfunc_t is object:
+        return ismember_object(arr, values)
+
+    elif htfunc_t is int8_t:
+        return ismember_int8(arr, values)
+    elif htfunc_t is int16_t:
+        return ismember_int16(arr, values)
+    elif htfunc_t is int32_t:
+        return ismember_int32(arr, values)
+    elif htfunc_t is int64_t:
+        return ismember_int64(arr, values)
+
+    elif htfunc_t is uint8_t:
+        return ismember_uint8(arr, values)
+    elif htfunc_t is uint16_t:
+        return ismember_uint16(arr, values)
+    elif htfunc_t is uint32_t:
+        return ismember_uint32(arr, values)
+    elif htfunc_t is uint64_t:
+        return ismember_uint64(arr, values)
+
+    elif htfunc_t is float64_t:
+        return ismember_float64(arr, values)
+    elif htfunc_t is float32_t:
+        return ismember_float32(arr, values)
+
+    elif htfunc_t is complex128_t:
+        return ismember_complex128(arr, values)
+    elif htfunc_t is complex64_t:
+        return ismember_complex64(arr, values)
+
+    else:
+        raise TypeError(values.dtype)
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
+    # TODO(cython3): use const htfunct_t[:]
+
+    cdef:
+        ndarray[htfunc_t] keys
+        ndarray[htfunc_t] modes
+        ndarray[uint8_t] res_mask = None
+
+        int64_t[::1] counts
+        int64_t count, _, max_count = -1
+        Py_ssize_t nkeys, k, na_counter, j = 0
+
+    keys, counts, na_counter = value_count(values, dropna, mask=mask)
+    nkeys = len(keys)
+
+    modes = np.empty(nkeys, dtype=values.dtype)
+
+    if htfunc_t is not object:
+        with nogil:
+            for k in range(nkeys):
+                count = counts[k]
+                if count == max_count:
+                    j += 1
+                elif count > max_count:
+                    max_count = count
+                    j = 0
+                else:
+                    continue
+
+                modes[j] = keys[k]
+    else:
+        for k in range(nkeys):
+            count = counts[k]
+            if count == max_count:
+                j += 1
+            elif count > max_count:
+                max_count = count
+                j = 0
+            else:
+                continue
+
+            modes[j] = keys[k]
+
+    if na_counter > 0:
+        res_mask = np.zeros(j+1, dtype=np.bool_)
+        res_mask[j] = True
+    return modes[:j + 1], res_mask

From 6756eb41f4b19cfa2134c0c5209862bea9c9dd1d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 12:45:09 -0800
Subject: [PATCH 04/14] remove errant file

---
 .../src/vendored/ujson/python/objToJSON_old.c | 2057 -----------------
 1 file changed, 2057 deletions(-)
 delete mode 100644 pandas/_libs/src/vendored/ujson/python/objToJSON_old.c

diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c b/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c
deleted file mode 100644
index 9f1c1d3f857d1..0000000000000
--- a/pandas/_libs/src/vendored/ujson/python/objToJSON_old.c
+++ /dev/null
@@ -1,2057 +0,0 @@
-/*
-Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-* Neither the name of the ESN Social Software AB nor the
-names of its contributors may be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
-THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
-https://github.com/client9/stringencoders
-Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights
-reserved.
-
-Numeric decoder derived from TCL library
-https://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
-* Copyright (c) 1988-1993 The Regents of the University of California.
-* Copyright (c) 1994 Sun Microsystems, Inc.
-*/
-
-// Licence at LICENSES/ULTRAJSON_LICENSE
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-#include <math.h>
-
-#define NO_IMPORT_ARRAY
-#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
-#include "datetime.h"
-#include "pandas/datetime/pd_datetime.h"
-#include "pandas/vendored/ujson/lib/ultrajson.h"
-#include <numpy/arrayobject.h>
-#include <numpy/arrayscalars.h>
-#include <numpy/ndarraytypes.h>
-#include <numpy/npy_math.h>
-
-npy_int64 get_nat(void) { return NPY_MIN_INT64; }
-
-typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
-                                  size_t *_outLen);
-
-int object_is_decimal_type(PyObject *obj);
-int object_is_dataframe_type(PyObject *obj);
-int object_is_series_type(PyObject *obj);
-int object_is_index_type(PyObject *obj);
-int object_is_nat_type(PyObject *obj);
-int object_is_na_type(PyObject *obj);
-
-typedef struct __NpyArrContext {
-  PyObject *array;
-  char *dataptr;
-  int curdim;    // current dimension in array's order
-  int stridedim; // dimension we are striding over
-  int inc;       // stride dimension increment (+/- 1)
-  npy_intp dim;
-  npy_intp stride;
-  npy_intp ndim;
-  npy_intp index[NPY_MAXDIMS];
-  int type_num;
-  PyArray_GetItemFunc *getitem;
-
-  char **rowLabels;
-  char **columnLabels;
-} NpyArrContext;
-
-typedef struct __PdBlockContext {
-  int colIdx;
-  int ncols;
-  int transpose;
-
-  NpyArrContext **npyCtxts; // NpyArrContext for each column
-} PdBlockContext;
-
-typedef struct __TypeContext {
-  JSPFN_ITERBEGIN iterBegin;
-  JSPFN_ITEREND iterEnd;
-  JSPFN_ITERNEXT iterNext;
-  JSPFN_ITERGETNAME iterGetName;
-  JSPFN_ITERGETVALUE iterGetValue;
-  PFN_PyTypeToUTF8 PyTypeToUTF8;
-  PyObject *newObj;
-  PyObject *dictObj;
-  Py_ssize_t index;
-  Py_ssize_t size;
-  PyObject *itemValue;
-  PyObject *itemName;
-  PyObject *attrList;
-  PyObject *iterator;
-
-  double doubleValue;
-  JSINT64 longValue;
-
-  char *cStr;
-  NpyArrContext *npyarr;
-  PdBlockContext *pdblock;
-  int transpose;
-  char **rowLabels;
-  char **columnLabels;
-  npy_intp rowLabelsLen;
-  npy_intp columnLabelsLen;
-} TypeContext;
-
-typedef struct __PyObjectEncoder {
-  JSONObjectEncoder enc;
-
-  // pass through the NpyArrContext when encoding multi-dimensional arrays
-  NpyArrContext *npyCtxtPassthru;
-
-  // pass through the PdBlockContext when encoding blocks
-  PdBlockContext *blkCtxtPassthru;
-
-  // pass-through to encode numpy data directly
-  int npyType;
-  void *npyValue;
-
-  int datetimeIso;
-  NPY_DATETIMEUNIT datetimeUnit;
-  NPY_DATETIMEUNIT valueUnit;
-
-  // output format style for pandas data types
-  int outputFormat;
-  int originalOutputFormat;
-
-  PyObject *defaultHandler;
-} PyObjectEncoder;
-
-#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv))
-
-enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES };
-
-static int PdBlock_iterNext(JSOBJ, JSONTypeContext *);
-
-static TypeContext *createTypeContext(void) {
-  TypeContext *pc = PyObject_Malloc(sizeof(TypeContext));
-  if (!pc) {
-    PyErr_NoMemory();
-    return NULL;
-  }
-  pc->newObj = NULL;
-  pc->dictObj = NULL;
-  pc->itemValue = NULL;
-  pc->itemName = NULL;
-  pc->attrList = NULL;
-  pc->index = 0;
-  pc->size = 0;
-  pc->longValue = 0;
-  pc->doubleValue = 0.0;
-  pc->cStr = NULL;
-  pc->npyarr = NULL;
-  pc->pdblock = NULL;
-  pc->rowLabels = NULL;
-  pc->columnLabels = NULL;
-  pc->transpose = 0;
-  pc->rowLabelsLen = 0;
-  pc->columnLabelsLen = 0;
-
-  return pc;
-}
-
-static PyObject *get_values(PyObject *obj) {
-  PyObject *values = NULL;
-
-  if (object_is_index_type(obj) || object_is_series_type(obj)) {
-    // The special cases to worry about are dt64tz and category[dt64tz].
-    //  In both cases we want the UTC-localized datetime64 ndarray,
-    //  without going through and object array of Timestamps.
-    if (PyObject_HasAttrString(obj, "tz")) {
-      PyObject *tz = PyObject_GetAttrString(obj, "tz");
-      if (tz != Py_None) {
-        // Go through object array if we have dt64tz, since tz info will
-        // be lost if values is used directly.
-        Py_DECREF(tz);
-        values = PyObject_CallMethod(obj, "__array__", NULL);
-        return values;
-      }
-      Py_DECREF(tz);
-    }
-    values = PyObject_GetAttrString(obj, "values");
-    if (values == NULL) {
-      // Clear so we can subsequently try another method
-      PyErr_Clear();
-    } else if (PyObject_HasAttrString(values, "__array__")) {
-      // We may have gotten a Categorical or Sparse array so call np.array
-      PyObject *array_values = PyObject_CallMethod(values, "__array__", NULL);
-      Py_DECREF(values);
-      values = array_values;
-    } else if (!PyArray_CheckExact(values)) {
-      // Didn't get a numpy array, so keep trying
-      Py_DECREF(values);
-      values = NULL;
-    }
-  }
-
-  if (values == NULL) {
-    PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj));
-    PyObject *repr;
-    if (PyObject_HasAttrString(obj, "dtype")) {
-      PyObject *dtype = PyObject_GetAttrString(obj, "dtype");
-      repr = PyObject_Repr(dtype);
-      Py_DECREF(dtype);
-    } else {
-      repr = PyUnicode_FromString("<unknown dtype>");
-    }
-
-    PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
-                 repr, typeRepr);
-    Py_DECREF(repr);
-    Py_DECREF(typeRepr);
-
-    return NULL;
-  }
-
-  return values;
-}
-
-static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) {
-  PyObject *tmp = PyObject_GetAttrString(obj, attr);
-  if (tmp == 0) {
-    return 0;
-  }
-  PyObject *ret = PyObject_GetAttrString(tmp, subAttr);
-  Py_DECREF(tmp);
-
-  return ret;
-}
-
-static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
-  PyObject *tmp = PyObject_GetAttrString(obj, attr);
-  if (tmp == 0) {
-    return 0;
-  }
-  Py_ssize_t ret = PyObject_Length(tmp);
-  Py_DECREF(tmp);
-
-  if (ret == -1) {
-    return 0;
-  }
-
-  return ret;
-}
-
-static npy_int64 get_long_attr(PyObject *o, const char *attr) {
-  // NB we are implicitly assuming that o is a Timedelta or Timestamp, or NaT
-
-  PyObject *value = PyObject_GetAttrString(o, attr);
-  const npy_int64 long_val =
-      (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));
-
-  Py_DECREF(value);
-
-  if (object_is_nat_type(o)) {
-    // i.e. o is NaT, long_val will be NPY_MIN_INT64
-    return long_val;
-  }
-
-  // ensure we are in nanoseconds, similar to Timestamp._as_creso or _as_unit
-  PyObject *reso = PyObject_GetAttrString(o, "_creso");
-  if (!PyLong_Check(reso)) {
-    // https://github.com/pandas-dev/pandas/pull/49034#discussion_r1023165139
-    Py_DECREF(reso);
-    return -1;
-  }
-
-  long cReso = PyLong_AsLong(reso);
-  Py_DECREF(reso);
-  if (cReso == -1 && PyErr_Occurred()) {
-    return -1;
-  }
-
-  if (cReso == NPY_FR_us) {
-    return long_val * 1000L;
-  } else if (cReso == NPY_FR_ms) {
-    return long_val * 1000000L;
-  } else if (cReso == NPY_FR_s) {
-    return long_val * 1000000000L;
-  }
-
-  return long_val;
-}
-
-static npy_float64 total_seconds(PyObject *td) {
-  PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL);
-  const npy_float64 double_val = PyFloat_AS_DOUBLE(value);
-  Py_DECREF(value);
-  return double_val;
-}
-
-static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
-                           size_t *_outLen) {
-  PyObject *obj = (PyObject *)_obj;
-  *_outLen = PyBytes_GET_SIZE(obj);
-  return PyBytes_AS_STRING(obj);
-}
-
-static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, size_t *_outLen) {
-  char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
-  if (encoded == NULL) {
-    /* Something went wrong.
-      Set errorMsg(to tell encoder to stop),
-      and let Python exception propagate. */
-    JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
-    enc->errorMsg = "Encoding failed.";
-  }
-  return encoded;
-}
-
-/* JSON callback. returns a char* and mutates the pointer to *len */
-static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
-                                      JSONTypeContext *tc, size_t *len) {
-  NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-  NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
-  GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
-  return GET_TC(tc)->cStr;
-}
-
-/* JSON callback. returns a char* and mutates the pointer to *len */
-static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
-                                       JSONTypeContext *tc, size_t *len) {
-  GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
-  return GET_TC(tc)->cStr;
-}
-
-/* JSON callback */
-static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
-                                     size_t *len) {
-  if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
-    PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
-    ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
-    return NULL;
-  }
-
-  NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-  return PyDateTimeToIso(obj, base, len);
-}
-
-static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) {
-  PyObject *obj = (PyObject *)_obj;
-  PyObject *str = PyObject_CallMethod(obj, "isoformat", NULL);
-  if (str == NULL) {
-    *outLen = 0;
-    if (!PyErr_Occurred()) {
-      PyErr_SetString(PyExc_ValueError, "Failed to convert time");
-    }
-    ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
-    return NULL;
-  }
-  if (PyUnicode_Check(str)) {
-    PyObject *tmp = str;
-    str = PyUnicode_AsUTF8String(str);
-    Py_DECREF(tmp);
-  }
-
-  GET_TC(tc)->newObj = str;
-
-  *outLen = PyBytes_GET_SIZE(str);
-  char *outValue = PyBytes_AS_STRING(str);
-  return outValue;
-}
-
-//=============================================================================
-// Numpy array iteration functions
-//=============================================================================
-
-static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->npyarr &&
-      GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) {
-    Py_XDECREF(GET_TC(tc)->itemValue);
-    GET_TC(tc)->itemValue = NULL;
-  }
-}
-
-static int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj),
-                               JSONTypeContext *Py_UNUSED(tc)) {
-  return 0;
-}
-
-static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
-  PyArrayObject *obj =
-      (PyArrayObject *)(GET_TC(tc)->newObj ? GET_TC(tc)->newObj : _obj);
-
-  NpyArrContext *npyarr = PyObject_Malloc(sizeof(NpyArrContext));
-  GET_TC(tc)->npyarr = npyarr;
-
-  if (!npyarr) {
-    PyErr_NoMemory();
-    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-    return;
-  }
-
-  npyarr->array = (PyObject *)obj;
-  npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
-  npyarr->dataptr = PyArray_DATA(obj);
-  npyarr->ndim = PyArray_NDIM(obj) - 1;
-  npyarr->curdim = 0;
-  npyarr->type_num = PyArray_DESCR(obj)->type_num;
-
-  if (GET_TC(tc)->transpose) {
-    npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
-    npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
-    npyarr->stridedim = npyarr->ndim;
-    npyarr->index[npyarr->ndim] = 0;
-    npyarr->inc = -1;
-  } else {
-    npyarr->dim = PyArray_DIM(obj, 0);
-    npyarr->stride = PyArray_STRIDE(obj, 0);
-    npyarr->stridedim = 0;
-    npyarr->index[0] = 0;
-    npyarr->inc = 1;
-  }
-
-  npyarr->columnLabels = GET_TC(tc)->columnLabels;
-  npyarr->rowLabels = GET_TC(tc)->rowLabels;
-}
-
-static void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-
-  if (npyarr) {
-    NpyArr_freeItemValue(obj, tc);
-    PyObject_Free(npyarr);
-  }
-}
-
-static void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj),
-                                     JSONTypeContext *Py_UNUSED(tc)) {}
-
-static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-  // finished this dimension, reset the data pointer
-  npyarr->curdim--;
-  npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim];
-  npyarr->stridedim -= npyarr->inc;
-  npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
-  npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
-  npyarr->dataptr += npyarr->stride;
-
-  NpyArr_freeItemValue(obj, tc);
-}
-
-static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-
-  if (PyErr_Occurred()) {
-    return 0;
-  }
-
-  if (npyarr->index[npyarr->stridedim] >= npyarr->dim) {
-    return 0;
-  }
-
-  NpyArr_freeItemValue(obj, tc);
-
-  if (PyArray_ISDATETIME(npyarr->array)) {
-    GET_TC(tc)->itemValue = obj;
-    Py_INCREF(obj);
-    ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
-    // Also write the resolution (unit) of the ndarray
-    PyArray_Descr *dtype = PyArray_DESCR(npyarr->array);
-    ((PyObjectEncoder *)tc->encoder)->valueUnit =
-        get_datetime_metadata_from_dtype(dtype).base;
-    ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
-    ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
-  } else {
-    GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
-  }
-
-  npyarr->dataptr += npyarr->stride;
-  npyarr->index[npyarr->stridedim]++;
-  return 1;
-}
-
-static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-
-  if (PyErr_Occurred()) {
-    return 0;
-  }
-
-  if (npyarr->curdim >= npyarr->ndim ||
-      npyarr->index[npyarr->stridedim] >= npyarr->dim) {
-    // innermost dimension, start retrieving item values
-    GET_TC(tc)->iterNext = NpyArr_iterNextItem;
-    return NpyArr_iterNextItem(_obj, tc);
-  }
-
-  // dig a dimension deeper
-  npyarr->index[npyarr->stridedim]++;
-
-  npyarr->curdim++;
-  npyarr->stridedim += npyarr->inc;
-  npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
-  npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
-  npyarr->index[npyarr->stridedim] = 0;
-
-  ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
-  GET_TC(tc)->itemValue = npyarr->array;
-  return 1;
-}
-
-static JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                                size_t *outLen) {
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-  char *cStr;
-
-  if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
-    const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
-    cStr = npyarr->columnLabels[idx];
-  } else {
-    const npy_intp idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
-    cStr = npyarr->rowLabels[idx];
-  }
-
-  *outLen = strlen(cStr);
-
-  return cStr;
-}
-
-//=============================================================================
-// Pandas block iteration functions
-//
-// Serialises a DataFrame column by column to avoid unnecessary data copies and
-// more representative serialisation when dealing with mixed dtypes.
-//
-// Uses a dedicated NpyArrContext for each column.
-//=============================================================================
-
-static void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-
-  if (blkCtxt->transpose) {
-    blkCtxt->colIdx++;
-  } else {
-    blkCtxt->colIdx = 0;
-  }
-
-  NpyArr_freeItemValue(obj, tc);
-}
-
-static int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-
-  if (blkCtxt->colIdx >= blkCtxt->ncols) {
-    return 0;
-  }
-
-  GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
-  blkCtxt->colIdx++;
-  return NpyArr_iterNextItem(obj, tc);
-}
-
-static char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                                 size_t *outLen) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-  NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
-  char *cStr;
-
-  if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
-    const npy_intp idx = blkCtxt->colIdx - 1;
-    cStr = npyarr->columnLabels[idx];
-  } else {
-    const npy_intp idx =
-        GET_TC(tc)->iterNext != PdBlock_iterNext
-            ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1
-            : npyarr->index[npyarr->stridedim];
-
-    cStr = npyarr->rowLabels[idx];
-  }
-
-  *outLen = strlen(cStr);
-  return cStr;
-}
-
-static char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj),
-                                           JSONTypeContext *tc,
-                                           size_t *outLen) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-  NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
-  char *cStr;
-
-  if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
-    const npy_intp idx = npyarr->index[npyarr->stridedim] - 1;
-    cStr = npyarr->columnLabels[idx];
-  } else {
-    const npy_intp idx = blkCtxt->colIdx;
-    cStr = npyarr->rowLabels[idx];
-  }
-
-  *outLen = strlen(cStr);
-  return cStr;
-}
-
-static int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-
-  if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
-    return 0;
-  }
-
-  if (blkCtxt->transpose) {
-    if (blkCtxt->colIdx >= blkCtxt->ncols) {
-      return 0;
-    }
-  } else {
-    const NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
-    if (npyarr->index[npyarr->stridedim] >= npyarr->dim) {
-      return 0;
-    }
-  }
-
-  ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt;
-  GET_TC(tc)->itemValue = obj;
-
-  return 1;
-}
-
-static void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj),
-                                      JSONTypeContext *tc) {
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-
-  if (blkCtxt->transpose) {
-    // if transposed we exhaust each column before moving to the next
-    GET_TC(tc)->iterNext = NpyArr_iterNextItem;
-    GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose;
-    GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
-  }
-}
-
-static void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
-  PyObject *obj = (PyObject *)_obj;
-
-  GET_TC(tc)->iterGetName = GET_TC(tc)->transpose
-                                ? PdBlock_iterGetName_Transpose
-                                : PdBlock_iterGetName;
-
-  PdBlockContext *blkCtxt = PyObject_Malloc(sizeof(PdBlockContext));
-  if (!blkCtxt) {
-    PyErr_NoMemory();
-    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-    return;
-  }
-  GET_TC(tc)->pdblock = blkCtxt;
-
-  blkCtxt->colIdx = 0;
-  blkCtxt->transpose = GET_TC(tc)->transpose;
-  blkCtxt->ncols = get_attr_length(obj, "columns");
-
-  if (blkCtxt->ncols == 0) {
-    blkCtxt->npyCtxts = NULL;
-
-    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-    return;
-  }
-
-  blkCtxt->npyCtxts = PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols);
-  if (!blkCtxt->npyCtxts) {
-    PyErr_NoMemory();
-    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-    return;
-  }
-
-  PyObject *arrays = get_sub_attr(obj, "_mgr", "column_arrays");
-  if (!arrays) {
-    GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-    return;
-  }
-
-  for (Py_ssize_t i = 0; i < PyObject_Length(arrays); i++) {
-    PyObject *array = PyList_GET_ITEM(arrays, i);
-    if (!array) {
-      GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-      goto ARR_RET;
-    }
-
-    // ensure we have a numpy array (i.e. np.asarray)
-    PyObject *values = PyObject_CallMethod(array, "__array__", NULL);
-    if ((!values) || (!PyArray_CheckExact(values))) {
-      // Didn't get a numpy array
-      ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
-      GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-      goto ARR_RET;
-    }
-
-    GET_TC(tc)->newObj = values;
-
-    // init a dedicated context for this column
-    NpyArr_iterBegin(obj, tc);
-
-    GET_TC(tc)->itemValue = NULL;
-    ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
-
-    blkCtxt->npyCtxts[i] = GET_TC(tc)->npyarr;
-    GET_TC(tc)->newObj = NULL;
-  }
-  GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0];
-  goto ARR_RET;
-
-ARR_RET:
-  Py_DECREF(arrays);
-}
-
-static void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->itemValue = NULL;
-  NpyArrContext *npyarr = GET_TC(tc)->npyarr;
-  PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
-
-  if (blkCtxt) {
-    for (int i = 0; i < blkCtxt->ncols; i++) {
-      npyarr = blkCtxt->npyCtxts[i];
-      if (npyarr) {
-        if (npyarr->array) {
-          Py_DECREF(npyarr->array);
-          npyarr->array = NULL;
-        }
-
-        GET_TC(tc)->npyarr = npyarr;
-        NpyArr_iterEnd(obj, tc);
-
-        blkCtxt->npyCtxts[i] = NULL;
-      }
-    }
-
-    if (blkCtxt->npyCtxts) {
-      PyObject_Free(blkCtxt->npyCtxts);
-    }
-    PyObject_Free(blkCtxt);
-  }
-}
-
-//=============================================================================
-// Tuple iteration functions
-// itemValue is borrowed reference, no ref counting
-//=============================================================================
-static void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj);
-  GET_TC(tc)->itemValue = NULL;
-}
-
-static int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-
-  if (GET_TC(tc)->index >= GET_TC(tc)->size) {
-    return 0;
-  }
-
-  PyObject *item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index);
-
-  GET_TC(tc)->itemValue = item;
-  GET_TC(tc)->index++;
-  return 1;
-}
-
-static void Tuple_iterEnd(JSOBJ Py_UNUSED(obj),
-                          JSONTypeContext *Py_UNUSED(tc)) {}
-
-static JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj),
-                               JSONTypeContext *Py_UNUSED(tc),
-                               size_t *Py_UNUSED(outLen)) {
-  return NULL;
-}
-
-//=============================================================================
-// Set iteration functions
-// itemValue is borrowed reference, no ref counting
-//=============================================================================
-static void Set_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->itemValue = NULL;
-  GET_TC(tc)->iterator = PyObject_GetIter(obj);
-}
-
-static int Set_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->itemValue) {
-    Py_DECREF(GET_TC(tc)->itemValue);
-    GET_TC(tc)->itemValue = NULL;
-  }
-
-  PyObject *item = PyIter_Next(GET_TC(tc)->iterator);
-
-  if (item == NULL) {
-    return 0;
-  }
-
-  GET_TC(tc)->itemValue = item;
-  return 1;
-}
-
-static void Set_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->itemValue) {
-    Py_DECREF(GET_TC(tc)->itemValue);
-    GET_TC(tc)->itemValue = NULL;
-  }
-
-  if (GET_TC(tc)->iterator) {
-    Py_DECREF(GET_TC(tc)->iterator);
-    GET_TC(tc)->iterator = NULL;
-  }
-}
-
-static JSOBJ Set_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Set_iterGetName(JSOBJ Py_UNUSED(obj),
-                             JSONTypeContext *Py_UNUSED(tc),
-                             size_t *Py_UNUSED(outLen)) {
-  return NULL;
-}
-
-//=============================================================================
-// Dir iteration functions
-// itemName ref is borrowed from PyObject_Dir (attrList). No refcount
-// itemValue ref is from PyObject_GetAttr. Ref counted
-//=============================================================================
-static void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->attrList = PyObject_Dir(obj);
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList);
-}
-
-static void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->itemValue) {
-    Py_DECREF(GET_TC(tc)->itemValue);
-    GET_TC(tc)->itemValue = NULL;
-  }
-
-  if (GET_TC(tc)->itemName) {
-    Py_DECREF(GET_TC(tc)->itemName);
-    GET_TC(tc)->itemName = NULL;
-  }
-
-  Py_DECREF((PyObject *)GET_TC(tc)->attrList);
-}
-
-static int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
-  PyObject *obj = (PyObject *)_obj;
-  PyObject *itemValue = GET_TC(tc)->itemValue;
-  PyObject *itemName = GET_TC(tc)->itemName;
-
-  if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) {
-    return 0;
-  }
-
-  if (itemValue) {
-    Py_DECREF(GET_TC(tc)->itemValue);
-    GET_TC(tc)->itemValue = itemValue = NULL;
-  }
-
-  if (itemName) {
-    Py_DECREF(GET_TC(tc)->itemName);
-    GET_TC(tc)->itemName = itemName = NULL;
-  }
-
-  for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) {
-    PyObject *attrName =
-        PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index);
-    PyObject *attr = PyUnicode_AsUTF8String(attrName);
-    const char *attrStr = PyBytes_AS_STRING(attr);
-
-    if (attrStr[0] == '_') {
-      Py_DECREF(attr);
-      continue;
-    }
-
-    itemValue = PyObject_GetAttr(obj, attrName);
-    if (itemValue == NULL) {
-      PyErr_Clear();
-      Py_DECREF(attr);
-      continue;
-    }
-
-    if (PyCallable_Check(itemValue)) {
-      Py_DECREF(itemValue);
-      Py_DECREF(attr);
-      continue;
-    }
-
-    GET_TC(tc)->itemName = itemName;
-    GET_TC(tc)->itemValue = itemValue;
-
-    itemName = attr;
-    break;
-  }
-
-  if (itemName == NULL) {
-    GET_TC(tc)->index = GET_TC(tc)->size;
-    GET_TC(tc)->itemValue = NULL;
-    return 0;
-  }
-
-  GET_TC(tc)->itemName = itemName;
-  GET_TC(tc)->itemValue = itemValue;
-  GET_TC(tc)->index++;
-
-  return 1;
-}
-
-static JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                             size_t *outLen) {
-  *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
-  return PyBytes_AS_STRING(GET_TC(tc)->itemName);
-}
-
-//=============================================================================
-// List iteration functions
-// itemValue is borrowed from object (which is list). No refcounting
-//=============================================================================
-static void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj);
-}
-
-static int List_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  if (GET_TC(tc)->index >= GET_TC(tc)->size) {
-    return 0;
-  }
-
-  GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index);
-  GET_TC(tc)->index++;
-  return 1;
-}
-
-static void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {
-}
-
-static JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *List_iterGetName(JSOBJ Py_UNUSED(obj),
-                              JSONTypeContext *Py_UNUSED(tc),
-                              size_t *Py_UNUSED(outLen)) {
-  return NULL;
-}
-
-//=============================================================================
-// pandas Index iteration functions
-//=============================================================================
-static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
-  if (!GET_TC(tc)->cStr) {
-    PyErr_NoMemory();
-  }
-}
-
-static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  if (!GET_TC(tc)->cStr) {
-    return 0;
-  }
-
-  const Py_ssize_t index = GET_TC(tc)->index;
-  Py_XDECREF(GET_TC(tc)->itemValue);
-  if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
-    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
-  } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
-    GET_TC(tc)->itemValue = get_values(obj);
-    if (!GET_TC(tc)->itemValue) {
-      return 0;
-    }
-  } else {
-    return 0;
-  }
-
-  GET_TC(tc)->index++;
-  return 1;
-}
-
-static void Index_iterEnd(JSOBJ Py_UNUSED(obj),
-                          JSONTypeContext *Py_UNUSED(tc)) {}
-
-static JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                               size_t *outLen) {
-  *outLen = strlen(GET_TC(tc)->cStr);
-  return GET_TC(tc)->cStr;
-}
-
-//=============================================================================
-// pandas Series iteration functions
-//=============================================================================
-static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
-  enc->outputFormat = VALUES; // for contained series
-  if (!GET_TC(tc)->cStr) {
-    PyErr_NoMemory();
-  }
-}
-
-static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  if (!GET_TC(tc)->cStr) {
-    return 0;
-  }
-
-  const Py_ssize_t index = GET_TC(tc)->index;
-  Py_XDECREF(GET_TC(tc)->itemValue);
-  if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5);
-    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
-  } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
-    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
-  } else if (index == 2) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
-    GET_TC(tc)->itemValue = get_values(obj);
-    if (!GET_TC(tc)->itemValue) {
-      return 0;
-    }
-  } else {
-    return 0;
-  }
-
-  GET_TC(tc)->index++;
-  return 1;
-}
-
-static void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
-  enc->outputFormat = enc->originalOutputFormat;
-}
-
-static JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                                size_t *outLen) {
-  *outLen = strlen(GET_TC(tc)->cStr);
-  return GET_TC(tc)->cStr;
-}
-
-//=============================================================================
-// pandas DataFrame iteration functions
-//=============================================================================
-static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
-  GET_TC(tc)->index = 0;
-  GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
-  enc->outputFormat = VALUES; // for contained series & index
-  if (!GET_TC(tc)->cStr) {
-    PyErr_NoMemory();
-  }
-}
-
-static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  if (!GET_TC(tc)->cStr) {
-    return 0;
-  }
-
-  const Py_ssize_t index = GET_TC(tc)->index;
-  Py_XDECREF(GET_TC(tc)->itemValue);
-  if (index == 0) {
-    memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8);
-    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
-  } else if (index == 1) {
-    memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6);
-    GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
-  } else if (index == 2) {
-    memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5);
-    Py_INCREF(obj);
-    GET_TC(tc)->itemValue = obj;
-  } else {
-    return 0;
-  }
-
-  GET_TC(tc)->index++;
-  return 1;
-}
-
-static void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
-  enc->outputFormat = enc->originalOutputFormat;
-}
-
-static JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                                   size_t *outLen) {
-  *outLen = strlen(GET_TC(tc)->cStr);
-  return GET_TC(tc)->cStr;
-}
-
-//=============================================================================
-// Dict iteration functions
-// itemName might converted to string (Python_Str). Do refCounting
-// itemValue is borrowed from object (which is dict). No refCounting
-//=============================================================================
-static void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  GET_TC(tc)->index = 0;
-}
-
-static int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->itemName) {
-    Py_DECREF(GET_TC(tc)->itemName);
-    GET_TC(tc)->itemName = NULL;
-  }
-
-  if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index,
-                   &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) {
-    return 0;
-  }
-
-  if (PyUnicode_Check(GET_TC(tc)->itemName)) {
-    GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
-  } else if (!PyBytes_Check(GET_TC(tc)->itemName)) {
-    GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName);
-    PyObject *itemNameTmp = GET_TC(tc)->itemName;
-    GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
-    Py_DECREF(itemNameTmp);
-  } else {
-    Py_INCREF(GET_TC(tc)->itemName);
-  }
-  return 1;
-}
-
-static void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (GET_TC(tc)->itemName) {
-    Py_DECREF(GET_TC(tc)->itemName);
-    GET_TC(tc)->itemName = NULL;
-  }
-  Py_DECREF(GET_TC(tc)->dictObj);
-}
-
-static JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->itemValue;
-}
-
-static char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc,
-                              size_t *outLen) {
-  *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
-  return PyBytes_AS_STRING(GET_TC(tc)->itemName);
-}
-
-static void NpyArr_freeLabels(char **labels, npy_intp len) {
-  if (labels) {
-    for (npy_intp i = 0; i < len; i++) {
-      PyObject_Free(labels[i]);
-    }
-    PyObject_Free(labels);
-  }
-}
-
-/*
- * Function: NpyArr_encodeLabels
- * -----------------------------
- *
- * Builds an array of "encoded" labels.
- *
- * labels: PyArrayObject pointer for labels to be "encoded"
- * num : number of labels
- *
- * "encode" is quoted above because we aren't really doing encoding
- * For historical reasons this function would actually encode the entire
- * array into a separate buffer with a separate call to JSON_Encode
- * and would leave it to complex pointer manipulation from there to
- * unpack values as needed. To make things simpler and more idiomatic
- * this has instead just stringified any input save for datetime values,
- * which may need to be represented in various formats.
- */
-static char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
-                                  npy_intp num) {
-  // NOTE this function steals a reference to labels.
-  PyObject *item = NULL;
-  const NPY_DATETIMEUNIT base = enc->datetimeUnit;
-
-  if (!labels) {
-    return 0;
-  }
-
-  if (PyArray_SIZE(labels) < num) {
-    PyErr_SetString(PyExc_ValueError,
-                    "Label array sizes do not match corresponding data shape");
-    Py_DECREF(labels);
-    return 0;
-  }
-
-  char **ret = PyObject_Malloc(sizeof(char *) * num);
-  if (!ret) {
-    PyErr_NoMemory();
-    Py_DECREF(labels);
-    return 0;
-  }
-
-  for (npy_intp i = 0; i < num; i++) {
-    ret[i] = NULL;
-  }
-
-  const npy_intp stride = PyArray_STRIDE(labels, 0);
-  char *dataptr = PyArray_DATA(labels);
-  const int type_num = PyArray_TYPE(labels);
-  PyArray_Descr *dtype = PyArray_DESCR(labels);
-
-  for (npy_intp i = 0; i < num; i++) {
-    item = PyArray_GETITEM(labels, dataptr);
-    if (!item) {
-      NpyArr_freeLabels(ret, num);
-      ret = 0;
-      break;
-    }
-
-    int is_datetimelike = 0;
-    npy_int64 i8date;
-    NPY_DATETIMEUNIT dateUnit = NPY_FR_ns;
-    if (PyTypeNum_ISDATETIME(type_num)) {
-      is_datetimelike = 1;
-      i8date = *(npy_int64 *)dataptr;
-      dateUnit = get_datetime_metadata_from_dtype(dtype).base;
-    } else if (PyDate_Check(item) || PyDelta_Check(item)) {
-      is_datetimelike = 1;
-      if (PyObject_HasAttrString(item, "_value")) {
-        // pd.Timestamp object or pd.NaT
-        // see test_date_index_and_values for case with non-nano
-        i8date = get_long_attr(item, "_value");
-      } else {
-        if (PyDelta_Check(item)) {
-          i8date = total_seconds(item) * 1000000000LL; // nanoseconds per second
-        } else {
-          // datetime.* objects don't follow above rules
-          i8date = PyDateTimeToEpoch(item, NPY_FR_ns);
-        }
-      }
-    }
-
-    size_t len;
-    char *cLabel;
-    if (is_datetimelike) {
-      if (i8date == get_nat()) {
-        len = 4;
-        cLabel = PyObject_Malloc(len + 1);
-        strncpy(cLabel, "null", len + 1);
-      } else {
-        if (enc->datetimeIso) {
-          if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
-            // TODO(username): non-nano timedelta support?
-            cLabel = int64ToIsoDuration(i8date, &len);
-          } else {
-            if (type_num == NPY_DATETIME) {
-              cLabel = int64ToIso(i8date, dateUnit, base, &len);
-            } else {
-              cLabel = PyDateTimeToIso(item, base, &len);
-            }
-          }
-          if (cLabel == NULL) {
-            Py_DECREF(item);
-            NpyArr_freeLabels(ret, num);
-            ret = 0;
-            break;
-          }
-        } else {
-          int size_of_cLabel = 21; // 21 chars for int 64
-          cLabel = PyObject_Malloc(size_of_cLabel);
-          if (scaleNanosecToUnit(&i8date, base) == -1) {
-            NpyArr_freeLabels(ret, num);
-            ret = 0;
-            break;
-          }
-          snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT, i8date);
-          len = strlen(cLabel);
-        }
-      }
-    } else { // Fallback to string representation
-      // Replace item with the string to keep it alive.
-      Py_SETREF(item, PyObject_Str(item));
-      if (item == NULL) {
-        NpyArr_freeLabels(ret, num);
-        ret = 0;
-        break;
-      }
-
-      cLabel = (char *)PyUnicode_AsUTF8(item);
-      len = strlen(cLabel);
-    }
-
-    // Add 1 to include NULL terminator
-    ret[i] = PyObject_Malloc(len + 1);
-    memcpy(ret[i], cLabel, len + 1);
-    Py_DECREF(item);
-
-    if (is_datetimelike) {
-      PyObject_Free(cLabel);
-    }
-
-    if (PyErr_Occurred()) {
-      NpyArr_freeLabels(ret, num);
-      ret = 0;
-      break;
-    }
-
-    if (!ret[i]) {
-      PyErr_NoMemory();
-      ret = 0;
-      break;
-    }
-
-    dataptr += stride;
-  }
-
-  Py_DECREF(labels);
-  return ret;
-}
-
-static void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) {
-  PyObject *tmpObj = NULL;
-  tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL);
-  if (!PyErr_Occurred()) {
-    if (tmpObj == NULL) {
-      PyErr_SetString(PyExc_TypeError, "Failed to execute default handler");
-    } else {
-      encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0);
-    }
-  }
-  Py_XDECREF(tmpObj);
-  return;
-}
-
-static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
-  tc->prv = NULL;
-
-  if (!_obj) {
-    tc->type = JT_INVALID;
-    return;
-  }
-
-  PyObject *obj = (PyObject *)_obj;
-  PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
-
-  if (PyBool_Check(obj)) {
-    tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE;
-    return;
-  } else if (obj == Py_None) {
-    tc->type = JT_NULL;
-    return;
-  }
-
-  TypeContext *pc = createTypeContext();
-  if (!pc) {
-    tc->type = JT_INVALID;
-    return;
-  }
-  tc->prv = pc;
-
-  if (PyTypeNum_ISDATETIME(enc->npyType)) {
-    int64_t longVal = *(npy_int64 *)enc->npyValue;
-    if (longVal == get_nat()) {
-      tc->type = JT_NULL;
-    } else {
-      if (enc->datetimeIso) {
-        if (enc->npyType == NPY_TIMEDELTA) {
-          pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
-        } else {
-          pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
-        }
-        // Currently no way to pass longVal to iso function, so use
-        // state management
-        pc->longValue = longVal;
-        tc->type = JT_UTF8;
-      } else {
-        NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-        if (scaleNanosecToUnit(&longVal, base) == -1) {
-          goto INVALID;
-        }
-        pc->longValue = longVal;
-        tc->type = JT_LONG;
-      }
-    }
-
-    // TODO(username): this prevents infinite loop with
-    // mixed-type DataFrames;
-    // refactor
-    enc->npyCtxtPassthru = NULL;
-    enc->npyType = -1;
-    return;
-  }
-
-  if (PyIter_Check(obj) || (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) {
-    goto ISITERABLE;
-  }
-
-  if (PyLong_Check(obj)) {
-    tc->type = JT_LONG;
-    int overflow = 0;
-    pc->longValue = PyLong_AsLongLongAndOverflow(obj, &overflow);
-    int err;
-    err = (pc->longValue == -1) && PyErr_Occurred();
-
-    if (overflow) {
-      tc->type = JT_BIGNUM;
-    } else if (err) {
-      goto INVALID;
-    }
-
-    return;
-  } else if (PyFloat_Check(obj)) {
-    const double val = PyFloat_AS_DOUBLE(obj);
-    if (npy_isnan(val) || npy_isinf(val)) {
-      tc->type = JT_NULL;
-    } else {
-      pc->doubleValue = val;
-      tc->type = JT_DOUBLE;
-    }
-    return;
-  } else if (PyBytes_Check(obj)) {
-    pc->PyTypeToUTF8 = PyBytesToUTF8;
-    tc->type = JT_UTF8;
-    return;
-  } else if (PyUnicode_Check(obj)) {
-    pc->PyTypeToUTF8 = PyUnicodeToUTF8;
-    tc->type = JT_UTF8;
-    return;
-  } else if (object_is_decimal_type(obj)) {
-    pc->doubleValue = PyFloat_AsDouble(obj);
-    tc->type = JT_DOUBLE;
-    return;
-  } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) {
-    if (object_is_nat_type(obj)) {
-      tc->type = JT_NULL;
-      return;
-    }
-
-    if (enc->datetimeIso) {
-      pc->PyTypeToUTF8 = PyDateTimeToIsoCallback;
-      tc->type = JT_UTF8;
-    } else {
-      NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-      pc->longValue = PyDateTimeToEpoch(obj, base);
-      tc->type = JT_LONG;
-    }
-    return;
-  } else if (PyTime_Check(obj)) {
-    pc->PyTypeToUTF8 = PyTimeToJSON;
-    tc->type = JT_UTF8;
-    return;
-  } else if (PyArray_IsScalar(obj, Datetime)) {
-    npy_int64 longVal;
-    if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) {
-      tc->type = JT_NULL;
-      return;
-    }
-    PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
-    if (!PyTypeNum_ISDATETIME(dtype->type_num)) {
-      PyErr_Format(PyExc_ValueError, "Could not get resolution of datetime");
-      return;
-    }
-
-    PyArray_Descr *outcode = PyArray_DescrFromType(NPY_INT64);
-    PyArray_CastScalarToCtype(obj, &longVal, outcode);
-    Py_DECREF(outcode);
-
-    if (enc->datetimeIso) {
-      GET_TC(tc)->longValue = longVal;
-      pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
-      enc->valueUnit = get_datetime_metadata_from_dtype(dtype).base;
-      tc->type = JT_UTF8;
-    } else {
-      NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-      pc->longValue = PyDateTimeToEpoch(obj, base);
-      tc->type = JT_LONG;
-    }
-    return;
-  } else if (PyDelta_Check(obj)) {
-    npy_int64 value =
-        PyObject_HasAttrString(obj, "_value") ? get_long_attr(obj, "_value")
-                                              : // pd.Timedelta object or pd.NaT
-            total_seconds(obj) * 1000000000LL;  // nanoseconds per sec
-
-    if (value == get_nat()) {
-      tc->type = JT_NULL;
-      return;
-    } else if (enc->datetimeIso) {
-      pc->PyTypeToUTF8 = NpyTimeDeltaToIsoCallback;
-      tc->type = JT_UTF8;
-    } else {
-      const int unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-      if (scaleNanosecToUnit(&value, unit) != 0) {
-        // TODO(username): Add some kind of error handling here
-      }
-
-      if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_OverflowError)) {
-        goto INVALID;
-      }
-
-      tc->type = JT_LONG;
-    }
-    pc->longValue = value;
-    return;
-  } else if (PyArray_IsScalar(obj, Integer)) {
-    tc->type = JT_LONG;
-    PyArray_CastScalarToCtype(obj, &(pc->longValue),
-                              PyArray_DescrFromType(NPY_INT64));
-
-    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_OverflowError)) {
-      goto INVALID;
-    }
-
-    return;
-  } else if (PyArray_IsScalar(obj, Bool)) {
-    PyArray_CastScalarToCtype(obj, &(pc->longValue),
-                              PyArray_DescrFromType(NPY_BOOL));
-    tc->type = (pc->longValue) ? JT_TRUE : JT_FALSE;
-    return;
-  } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) {
-    PyArray_CastScalarToCtype(obj, &(pc->doubleValue),
-                              PyArray_DescrFromType(NPY_DOUBLE));
-    tc->type = JT_DOUBLE;
-    return;
-  } else if (PyArray_CheckScalar(obj)) {
-    PyErr_Format(PyExc_TypeError,
-                 "%R (numpy-scalar) is not JSON serializable at the moment",
-                 obj);
-    goto INVALID;
-  } else if (object_is_na_type(obj)) {
-    tc->type = JT_NULL;
-    return;
-  }
-
-ISITERABLE:
-
-  if (object_is_index_type(obj)) {
-    if (enc->outputFormat == SPLIT) {
-      tc->type = JT_OBJECT;
-      pc->iterBegin = Index_iterBegin;
-      pc->iterEnd = Index_iterEnd;
-      pc->iterNext = Index_iterNext;
-      pc->iterGetValue = Index_iterGetValue;
-      pc->iterGetName = Index_iterGetName;
-      return;
-    }
-
-    pc->newObj = get_values(obj);
-    if (pc->newObj) {
-      tc->type = JT_ARRAY;
-      pc->iterBegin = NpyArr_iterBegin;
-      pc->iterEnd = NpyArr_iterEnd;
-      pc->iterNext = NpyArr_iterNext;
-      pc->iterGetValue = NpyArr_iterGetValue;
-      pc->iterGetName = NpyArr_iterGetName;
-    } else {
-      goto INVALID;
-    }
-
-    return;
-  } else if (object_is_series_type(obj)) {
-    if (enc->outputFormat == SPLIT) {
-      tc->type = JT_OBJECT;
-      pc->iterBegin = Series_iterBegin;
-      pc->iterEnd = Series_iterEnd;
-      pc->iterNext = Series_iterNext;
-      pc->iterGetValue = Series_iterGetValue;
-      pc->iterGetName = Series_iterGetName;
-      return;
-    }
-
-    pc->newObj = get_values(obj);
-    if (!pc->newObj) {
-      goto INVALID;
-    }
-
-    if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) {
-      tc->type = JT_OBJECT;
-      PyObject *tmpObj = PyObject_GetAttrString(obj, "index");
-      if (!tmpObj) {
-        goto INVALID;
-      }
-      PyObject *values = get_values(tmpObj);
-      Py_DECREF(tmpObj);
-      if (!values) {
-        goto INVALID;
-      }
-      pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);
-      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
-                                             pc->columnLabelsLen);
-      if (!pc->columnLabels) {
-        goto INVALID;
-      }
-    } else {
-      tc->type = JT_ARRAY;
-    }
-    pc->iterBegin = NpyArr_iterBegin;
-    pc->iterEnd = NpyArr_iterEnd;
-    pc->iterNext = NpyArr_iterNext;
-    pc->iterGetValue = NpyArr_iterGetValue;
-    pc->iterGetName = NpyArr_iterGetName;
-    return;
-  } else if (PyArray_Check(obj)) {
-    if (enc->npyCtxtPassthru) {
-      pc->npyarr = enc->npyCtxtPassthru;
-      tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY);
-
-      pc->iterBegin = NpyArrPassThru_iterBegin;
-      pc->iterNext = NpyArr_iterNext;
-      pc->iterEnd = NpyArrPassThru_iterEnd;
-      pc->iterGetValue = NpyArr_iterGetValue;
-      pc->iterGetName = NpyArr_iterGetName;
-
-      enc->npyCtxtPassthru = NULL;
-      return;
-    }
-
-    tc->type = JT_ARRAY;
-    pc->iterBegin = NpyArr_iterBegin;
-    pc->iterEnd = NpyArr_iterEnd;
-    pc->iterNext = NpyArr_iterNext;
-    pc->iterGetValue = NpyArr_iterGetValue;
-    pc->iterGetName = NpyArr_iterGetName;
-    return;
-  } else if (object_is_dataframe_type(obj)) {
-    if (enc->blkCtxtPassthru) {
-      pc->pdblock = enc->blkCtxtPassthru;
-      tc->type =
-          (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY);
-
-      pc->iterBegin = PdBlockPassThru_iterBegin;
-      pc->iterEnd = PdBlockPassThru_iterEnd;
-      pc->iterNext = PdBlock_iterNextItem;
-      pc->iterGetName = PdBlock_iterGetName;
-      pc->iterGetValue = NpyArr_iterGetValue;
-
-      enc->blkCtxtPassthru = NULL;
-      return;
-    }
-
-    if (enc->outputFormat == SPLIT) {
-      tc->type = JT_OBJECT;
-      pc->iterBegin = DataFrame_iterBegin;
-      pc->iterEnd = DataFrame_iterEnd;
-      pc->iterNext = DataFrame_iterNext;
-      pc->iterGetValue = DataFrame_iterGetValue;
-      pc->iterGetName = DataFrame_iterGetName;
-      return;
-    }
-
-    pc->iterBegin = PdBlock_iterBegin;
-    pc->iterEnd = PdBlock_iterEnd;
-    pc->iterNext = PdBlock_iterNext;
-    pc->iterGetName = PdBlock_iterGetName;
-    pc->iterGetValue = NpyArr_iterGetValue;
-
-    if (enc->outputFormat == VALUES) {
-      tc->type = JT_ARRAY;
-    } else if (enc->outputFormat == RECORDS) {
-      tc->type = JT_ARRAY;
-      PyObject *tmpObj = PyObject_GetAttrString(obj, "columns");
-      if (!tmpObj) {
-        goto INVALID;
-      }
-      PyObject *values = get_values(tmpObj);
-      if (!values) {
-        Py_DECREF(tmpObj);
-        goto INVALID;
-      }
-      pc->columnLabelsLen = PyObject_Size(tmpObj);
-      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
-                                             pc->columnLabelsLen);
-      Py_DECREF(tmpObj);
-      if (!pc->columnLabels) {
-        goto INVALID;
-      }
-    } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) {
-      tc->type = JT_OBJECT;
-      PyObject *tmpObj =
-          (enc->outputFormat == INDEX ? PyObject_GetAttrString(obj, "index")
-                                      : PyObject_GetAttrString(obj, "columns"));
-      if (!tmpObj) {
-        goto INVALID;
-      }
-      PyObject *values = get_values(tmpObj);
-      if (!values) {
-        Py_DECREF(tmpObj);
-        goto INVALID;
-      }
-      pc->rowLabelsLen = PyObject_Size(tmpObj);
-      pc->rowLabels =
-          NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->rowLabelsLen);
-      Py_DECREF(tmpObj);
-      tmpObj =
-          (enc->outputFormat == INDEX ? PyObject_GetAttrString(obj, "columns")
-                                      : PyObject_GetAttrString(obj, "index"));
-      if (!tmpObj) {
-        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
-        pc->rowLabels = NULL;
-        goto INVALID;
-      }
-      values = get_values(tmpObj);
-      if (!values) {
-        Py_DECREF(tmpObj);
-        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
-        pc->rowLabels = NULL;
-        goto INVALID;
-      }
-      pc->columnLabelsLen = PyObject_Size(tmpObj);
-      pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
-                                             pc->columnLabelsLen);
-      Py_DECREF(tmpObj);
-      if (!pc->columnLabels) {
-        NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen);
-        pc->rowLabels = NULL;
-        goto INVALID;
-      }
-
-      if (enc->outputFormat == COLUMNS) {
-        pc->transpose = 1;
-      }
-    } else {
-      goto INVALID;
-    }
-    return;
-  } else if (PyDict_Check(obj)) {
-    tc->type = JT_OBJECT;
-    pc->iterBegin = Dict_iterBegin;
-    pc->iterEnd = Dict_iterEnd;
-    pc->iterNext = Dict_iterNext;
-    pc->iterGetValue = Dict_iterGetValue;
-    pc->iterGetName = Dict_iterGetName;
-    pc->dictObj = obj;
-    Py_INCREF(obj);
-
-    return;
-  } else if (PyList_Check(obj)) {
-    tc->type = JT_ARRAY;
-    pc->iterBegin = List_iterBegin;
-    pc->iterEnd = List_iterEnd;
-    pc->iterNext = List_iterNext;
-    pc->iterGetValue = List_iterGetValue;
-    pc->iterGetName = List_iterGetName;
-    return;
-  } else if (PyTuple_Check(obj)) {
-    tc->type = JT_ARRAY;
-    pc->iterBegin = Tuple_iterBegin;
-    pc->iterEnd = Tuple_iterEnd;
-    pc->iterNext = Tuple_iterNext;
-    pc->iterGetValue = Tuple_iterGetValue;
-    pc->iterGetName = Tuple_iterGetName;
-    return;
-  } else if (PyAnySet_Check(obj)) {
-    tc->type = JT_ARRAY;
-    pc->iterBegin = Set_iterBegin;
-    pc->iterEnd = Set_iterEnd;
-    pc->iterNext = Set_iterNext;
-    pc->iterGetValue = Set_iterGetValue;
-    pc->iterGetName = Set_iterGetName;
-    return;
-  }
-
-  PyObject *toDictFunc = PyObject_GetAttrString(obj, "toDict");
-
-  if (toDictFunc) {
-    PyObject *tuple = PyTuple_New(0);
-    PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
-    Py_DECREF(tuple);
-    Py_DECREF(toDictFunc);
-
-    if (toDictResult == NULL) {
-      PyErr_Clear();
-      tc->type = JT_NULL;
-      return;
-    }
-
-    if (!PyDict_Check(toDictResult)) {
-      Py_DECREF(toDictResult);
-      tc->type = JT_NULL;
-      return;
-    }
-
-    tc->type = JT_OBJECT;
-    pc->iterBegin = Dict_iterBegin;
-    pc->iterEnd = Dict_iterEnd;
-    pc->iterNext = Dict_iterNext;
-    pc->iterGetValue = Dict_iterGetValue;
-    pc->iterGetName = Dict_iterGetName;
-    pc->dictObj = toDictResult;
-    return;
-  }
-
-  PyErr_Clear();
-
-  if (enc->defaultHandler) {
-    Object_invokeDefaultHandler(obj, enc);
-    goto INVALID;
-  }
-
-  tc->type = JT_OBJECT;
-  pc->iterBegin = Dir_iterBegin;
-  pc->iterEnd = Dir_iterEnd;
-  pc->iterNext = Dir_iterNext;
-  pc->iterGetValue = Dir_iterGetValue;
-  pc->iterGetName = Dir_iterGetName;
-  return;
-
-INVALID:
-  tc->type = JT_INVALID;
-  PyObject_Free(tc->prv);
-  tc->prv = NULL;
-  return;
-}
-
-static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  if (tc->prv) {
-    Py_XDECREF(GET_TC(tc)->newObj);
-    GET_TC(tc)->newObj = NULL;
-    NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen);
-    GET_TC(tc)->rowLabels = NULL;
-    NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
-    GET_TC(tc)->columnLabels = NULL;
-    PyObject_Free(GET_TC(tc)->cStr);
-    GET_TC(tc)->cStr = NULL;
-    PyObject_Free(tc->prv);
-    tc->prv = NULL;
-  }
-}
-
-static const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc,
-                                         size_t *_outLen) {
-  return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen);
-}
-
-static JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->longValue;
-}
-
-static double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
-  return GET_TC(tc)->doubleValue;
-}
-
-static const char *Object_getBigNumStringValue(JSOBJ obj, JSONTypeContext *tc,
-                                               size_t *_outLen) {
-  PyObject *repr = PyObject_Str(obj);
-  const char *str = PyUnicode_AsUTF8AndSize(repr, (Py_ssize_t *)_outLen);
-  char *bytes = PyObject_Malloc(*_outLen + 1);
-  memcpy(bytes, str, *_outLen + 1);
-  GET_TC(tc)->cStr = bytes;
-
-  Py_DECREF(repr);
-
-  return GET_TC(tc)->cStr;
-}
-
-static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); }
-
-static void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->iterBegin(obj, tc);
-}
-
-static int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) {
-  return GET_TC(tc)->iterNext(obj, tc);
-}
-
-static void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
-  GET_TC(tc)->iterEnd(obj, tc);
-}
-
-static JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
-  return GET_TC(tc)->iterGetValue(obj, tc);
-}
-
-static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc,
-                                size_t *outLen) {
-  return GET_TC(tc)->iterGetName(obj, tc, outLen);
-}
-
-PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args,
-                    PyObject *kwargs) {
-  PyDateTime_IMPORT;
-  if (PyDateTimeAPI == NULL) {
-    return NULL;
-  }
-
-  PandasDateTime_IMPORT;
-  if (PandasDateTimeAPI == NULL) {
-    return NULL;
-  }
-
-  static char *kwlist[] = {"obj",
-                           "ensure_ascii",
-                           "double_precision",
-                           "encode_html_chars",
-                           "orient",
-                           "date_unit",
-                           "iso_dates",
-                           "default_handler",
-                           "indent",
-                           NULL};
-
-  PyObject *oinput = NULL;
-  PyObject *oensureAscii = NULL;
-  int idoublePrecision = 10; // default double precision setting
-  PyObject *oencodeHTMLChars = NULL;
-  char *sOrient = NULL;
-  char *sdateFormat = NULL;
-  PyObject *oisoDates = 0;
-  PyObject *odefHandler = 0;
-  int indent = 0;
-
-  PyObjectEncoder pyEncoder = {{
-      Object_beginTypeContext,
-      Object_endTypeContext,
-      Object_getStringValue,
-      Object_getLongValue,
-      NULL, // getIntValue is unused
-      Object_getDoubleValue,
-      Object_getBigNumStringValue,
-      Object_iterBegin,
-      Object_iterNext,
-      Object_iterEnd,
-      Object_iterGetValue,
-      Object_iterGetName,
-      Object_releaseObject,
-      PyObject_Malloc,
-      PyObject_Realloc,
-      PyObject_Free,
-      -1, // recursionMax
-      idoublePrecision,
-      1,      // forceAscii
-      0,      // encodeHTMLChars
-      indent, // indent
-  }};
-  JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;
-
-  pyEncoder.npyCtxtPassthru = NULL;
-  pyEncoder.blkCtxtPassthru = NULL;
-  pyEncoder.npyType = -1;
-  pyEncoder.npyValue = NULL;
-  pyEncoder.datetimeIso = 0;
-  pyEncoder.datetimeUnit = NPY_FR_ms;
-  pyEncoder.outputFormat = COLUMNS;
-  pyEncoder.defaultHandler = 0;
-
-  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, &oinput,
-                                   &oensureAscii, &idoublePrecision,
-                                   &oencodeHTMLChars, &sOrient, &sdateFormat,
-                                   &oisoDates, &odefHandler, &indent)) {
-    return NULL;
-  }
-
-  if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) {
-    encoder->forceASCII = 0;
-  }
-
-  if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) {
-    encoder->encodeHTMLChars = 1;
-  }
-
-  if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) {
-    PyErr_Format(
-        PyExc_ValueError,
-        "Invalid value '%d' for option 'double_precision', max is '%u'",
-        idoublePrecision, JSON_DOUBLE_MAX_DECIMALS);
-    return NULL;
-  }
-  encoder->doublePrecision = idoublePrecision;
-
-  if (sOrient != NULL) {
-    if (strcmp(sOrient, "records") == 0) {
-      pyEncoder.outputFormat = RECORDS;
-    } else if (strcmp(sOrient, "index") == 0) {
-      pyEncoder.outputFormat = INDEX;
-    } else if (strcmp(sOrient, "split") == 0) {
-      pyEncoder.outputFormat = SPLIT;
-    } else if (strcmp(sOrient, "values") == 0) {
-      pyEncoder.outputFormat = VALUES;
-    } else if (strcmp(sOrient, "columns") != 0) {
-      PyErr_Format(PyExc_ValueError, "Invalid value '%s' for option 'orient'",
-                   sOrient);
-      return NULL;
-    }
-  }
-
-  if (sdateFormat != NULL) {
-    if (strcmp(sdateFormat, "s") == 0) {
-      pyEncoder.datetimeUnit = NPY_FR_s;
-    } else if (strcmp(sdateFormat, "ms") == 0) {
-      pyEncoder.datetimeUnit = NPY_FR_ms;
-    } else if (strcmp(sdateFormat, "us") == 0) {
-      pyEncoder.datetimeUnit = NPY_FR_us;
-    } else if (strcmp(sdateFormat, "ns") == 0) {
-      pyEncoder.datetimeUnit = NPY_FR_ns;
-    } else {
-      PyErr_Format(PyExc_ValueError,
-                   "Invalid value '%s' for option 'date_unit'", sdateFormat);
-      return NULL;
-    }
-  }
-
-  if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) {
-    pyEncoder.datetimeIso = 1;
-  }
-
-  if (odefHandler != NULL && odefHandler != Py_None) {
-    if (!PyCallable_Check(odefHandler)) {
-      PyErr_SetString(PyExc_TypeError, "Default handler is not callable");
-      return NULL;
-    }
-    pyEncoder.defaultHandler = odefHandler;
-  }
-
-  encoder->indent = indent;
-
-  pyEncoder.originalOutputFormat = pyEncoder.outputFormat;
-
-  char buffer[65536];
-  char *ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer));
-  if (PyErr_Occurred()) {
-    return NULL;
-  }
-
-  if (encoder->errorMsg) {
-    if (ret != buffer) {
-      encoder->free(ret);
-    }
-    PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg);
-    return NULL;
-  }
-
-  PyObject *newobj = PyUnicode_FromString(ret);
-
-  if (ret != buffer) {
-    encoder->free(ret);
-  }
-
-  return newobj;
-}

From 6c871fcedacc46a3f16fb450d18d4e05f9548019 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 14:27:29 -0800
Subject: [PATCH 05/14] comment

---
 pandas/_libs/hashtable_cpp.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/hashtable_cpp.pyx b/pandas/_libs/hashtable_cpp.pyx
index f25a3274de8b5..3ad1a3b137dd3 100644
--- a/pandas/_libs/hashtable_cpp.pyx
+++ b/pandas/_libs/hashtable_cpp.pyx
@@ -62,6 +62,7 @@ def unique_label_indices(const cnp.npy_intp[:] labels) -> cnp.ndarray:
                 idx.push_back(i)
 
     # TODO: must be a cleaner way to do this?
+    # even arr.data = move(idx.data()) would be better but arr.data is readonly
     arr = np.empty(idx.size(), dtype=np.intp)
     memcpy(arr.data, idx.const_data(), idx.size() * sizeof(cnp.npy_intp))
     arr = arr[np.asarray(labels)[arr].argsort()]

From f33e23a00405595e4e28135f3ef5c27b589cf3b5 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 9 Dec 2023 23:29:44 -0500
Subject: [PATCH 06/14] add khash dep

---
 pandas/_libs/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 1a4b2553526af..bf2b2ee8aafcf 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -127,6 +127,7 @@ py.extension_module(
   'hashtable_cpp',
   ['hashtable_cpp.pyx'],
   include_directories: [inc_np, inc_pd],
+  dependencies: _khash_primitive_helper_dep,
   subdir: 'pandas/_libs',
   override_options : ['cython_language=cpp'],
   install: true

From 1945abd444bb3626dc1ce820c958bfeb267c1107 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 10 Dec 2023 01:29:24 -0500
Subject: [PATCH 07/14] add cython args

---
 pandas/_libs/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index bf2b2ee8aafcf..5bf67bb03a0a9 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -126,6 +126,7 @@ endforeach
 py.extension_module(
   'hashtable_cpp',
   ['hashtable_cpp.pyx'],
+  cython_args: cython_args,
   include_directories: [inc_np, inc_pd],
   dependencies: _khash_primitive_helper_dep,
   subdir: 'pandas/_libs',

From a04808f740912950367625e65bfab4040c1a376b Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 10 Dec 2023 03:11:37 -0500
Subject: [PATCH 08/14] remove cimport form pxd

---
 pandas/_libs/hashtable_cpp.pyx | 10 +++++++---
 pandas/_libs/meson.build       |  1 -
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/hashtable_cpp.pyx b/pandas/_libs/hashtable_cpp.pyx
index 3ad1a3b137dd3..c1d3412d23f99 100644
--- a/pandas/_libs/hashtable_cpp.pyx
+++ b/pandas/_libs/hashtable_cpp.pyx
@@ -6,14 +6,18 @@ from libc.stdint cimport uint32_t
 from libc.string cimport memcpy
 from libcpp.vector cimport vector
 
-from pandas._libs.khash cimport kh_needed_n_buckets
-
 
 cdef extern from "<functional>" namespace "std" nogil:
     cdef cppclass hash[T]:
         hash()
         size_t operator()
 
+# TODO: duplicated with khash.pxd
+cdef extern from "pandas/vendored/klib/khash_python.h":
+    ctypedef uint32_t khuint_t
+    khuint_t kh_needed_n_buckets(khuint_t element_n) nogil
+
+
 cdef extern from "pandas/vendored/klib/cpp/khash.hpp" namespace "klib" nogil:
     cdef cppclass KHash[T, Hash, Eq=*, khint_t=*]:
         T *keys
@@ -31,7 +35,7 @@ cdef extern from "pandas/vendored/klib/cpp/khash.hpp" namespace "klib" nogil:
 
 
 # TODO: de-duplicate from hashtable.pyx
-cdef uint32_t SIZE_HINT_LIMIT = (1 << 20) + 7
+cdef khuint_t SIZE_HINT_LIMIT = (1 << 20) + 7
 
 
 @cython.wraparound(False)
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 5bf67bb03a0a9..1514348e14182 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -128,7 +128,6 @@ py.extension_module(
   ['hashtable_cpp.pyx'],
   cython_args: cython_args,
   include_directories: [inc_np, inc_pd],
-  dependencies: _khash_primitive_helper_dep,
   subdir: 'pandas/_libs',
   override_options : ['cython_language=cpp'],
   install: true

From d1cc45b4c3e393f5e66585358f0daadde889916a Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 10 Dec 2023 03:37:04 -0500
Subject: [PATCH 09/14] verbose meson

---
 .github/workflows/unit-tests.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index ffcd2ae32c09c..fcef39477cb1e 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -254,7 +254,8 @@ jobs:
           python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
           python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
-          python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
+          python -m pip install --no-cache-dir --no-build-isolation -e . \
+            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
           python -m pip list --no-cache-dir
           export PANDAS_CI=1
           python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
@@ -292,7 +293,8 @@ jobs:
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
           python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
-          python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
+          python -m pip install --no-cache-dir --no-build-isolation -e . \
+            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
           python -m pip list --no-cache-dir
 
       - name: Run Tests
@@ -365,7 +367,8 @@ jobs:
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install versioneer[toml]
           python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
-          python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
+          python -m pip install -ve . --no-build-isolation --no-index --no-deps \
+            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
           python -m pip list
 
       - name: Run Tests

From 7ef64991c32b67881cf557c557c3d0c8a105cdc6 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sun, 10 Dec 2023 22:57:10 -0500
Subject: [PATCH 10/14] different verbose

---
 .github/workflows/unit-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index fcef39477cb1e..824c934f7b5c6 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -255,7 +255,7 @@ jobs:
           python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
           python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e . \
-            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
+            --config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
           python -m pip list --no-cache-dir
           export PANDAS_CI=1
           python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
@@ -294,7 +294,7 @@ jobs:
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
           python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e . \
-            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
+            --config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
           python -m pip list --no-cache-dir
 
       - name: Run Tests
@@ -368,7 +368,7 @@ jobs:
           python -m pip install versioneer[toml]
           python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
           python -m pip install -ve . --no-build-isolation --no-index --no-deps \
-            --config-settings=setup-args="--werror" --config-settings editable-verbose=true
+            --config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
           python -m pip list
 
       - name: Run Tests

From 66de39c4fbc462b035c628a434c5fe82b1e7f80d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 12 Dec 2023 20:43:57 -0500
Subject: [PATCH 11/14] more build changes

---
 .github/actions/build_pandas/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml
index 460ae2f8594c0..54ac1ec5817fb 100644
--- a/.github/actions/build_pandas/action.yml
+++ b/.github/actions/build_pandas/action.yml
@@ -26,9 +26,9 @@ runs:
       run: |
         if [[ ${{ inputs.editable }} == "true" ]]; then
           pip install -e . --no-build-isolation -v --no-deps \
-            --config-settings=setup-args="--werror"
+            --config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
         else
           pip install . --no-build-isolation -v --no-deps \
-            --config-settings=setup-args="--werror"
+            --config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
         fi
       shell: bash -el {0}

From 87096441a49fe422759a08a8c7cd5f809b0cf07a Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 27 Dec 2023 18:42:17 -0500
Subject: [PATCH 12/14] Add -ffunction-sections argument

---
 pandas/_libs/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 1514348e14182..1fa2332645fe6 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -126,6 +126,7 @@ endforeach
 py.extension_module(
   'hashtable_cpp',
   ['hashtable_cpp.pyx'],
+  cpp_args: '-ffunction-sections',
   cython_args: cython_args,
   include_directories: [inc_np, inc_pd],
   subdir: 'pandas/_libs',

From 1f97b0144ea3ebadd2e530dd4466441707a88dda Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 27 Dec 2023 18:53:41 -0500
Subject: [PATCH 13/14] Add fvisibility=hidden

---
 pandas/_libs/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 1fa2332645fe6..2b2e41d78f7e0 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -126,7 +126,7 @@ endforeach
 py.extension_module(
   'hashtable_cpp',
   ['hashtable_cpp.pyx'],
-  cpp_args: '-ffunction-sections',
+  cpp_args: ['-ffunction-sections', '-fvisibility=hidden'],
   cython_args: cython_args,
   include_directories: [inc_np, inc_pd],
   subdir: 'pandas/_libs',

From 1e7d9e82cbdb5adb37686d87db9e0c5706016b24 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 27 Dec 2023 22:41:27 -0500
Subject: [PATCH 14/14] gc-sections linker arg

---
 pandas/_libs/meson.build | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
index 2b2e41d78f7e0..ed98e81b2a7bd 100644
--- a/pandas/_libs/meson.build
+++ b/pandas/_libs/meson.build
@@ -126,7 +126,8 @@ endforeach
 py.extension_module(
   'hashtable_cpp',
   ['hashtable_cpp.pyx'],
-  cpp_args: ['-ffunction-sections', '-fvisibility=hidden'],
+  cpp_args: '-ffunction-sections',
+  link_args: '-Wl,--gc-sections',
   cython_args: cython_args,
   include_directories: [inc_np, inc_pd],
   subdir: 'pandas/_libs',