Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: Move some Tempita to Cython/C++ #56432

Closed
wants to merge 16 commits into from
Closed
9 changes: 6 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ jobs:
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip install --no-cache-dir --no-build-isolation -e . \
--config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
python -m pip list --no-cache-dir
export PANDAS_CI=1
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
Expand Down Expand Up @@ -310,7 +311,8 @@ jobs:
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
python -m pip install --no-cache-dir --no-build-isolation -e . \
--config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
python -m pip list --no-cache-dir

- name: Run Tests
Expand Down Expand Up @@ -383,7 +385,8 @@ jobs:
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
python -m pip install -ve . --no-build-isolation --no-index --no-deps \
--config-settings=setup-args="--werror" --config-settings compile-args="--verbose"
python -m pip list

- name: Run Tests
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,8 @@ include "hashtable_func_helper.pxi"
# map derived hash-map types onto basic hash-map types:
if np.dtype(np.intp) == np.dtype(np.int64):
IntpHashTable = Int64HashTable
unique_label_indices = _unique_label_indices_int64
elif np.dtype(np.intp) == np.dtype(np.int32):
IntpHashTable = Int32HashTable
unique_label_indices = _unique_label_indices_int32
else:
raise ValueError(np.dtype(np.intp))

Expand Down
74 changes: 74 additions & 0 deletions pandas/_libs/hashtable_cpp.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import cython
import numpy as np

cimport numpy as cnp
from libc.stdint cimport uint32_t
from libc.string cimport memcpy
from libcpp.vector cimport vector


cdef extern from "<functional>" namespace "std" nogil:
cdef cppclass hash[T]:
hash()
size_t operator()

# TODO: duplicated with khash.pxd
cdef extern from "pandas/vendored/klib/khash_python.h":
ctypedef uint32_t khuint_t
khuint_t kh_needed_n_buckets(khuint_t element_n) nogil


cdef extern from "pandas/vendored/klib/cpp/khash.hpp" namespace "klib" nogil:
cdef cppclass KHash[T, Hash, Eq=*, khint_t=*]:
T *keys
KHash()
# TODO: validate we don't need deconstructor
# ~KHash()
void exist(khint_t x)
T &at(khint_t x)
khint_t get(const T &)
# TODO: make this khint_t
# int resize(khint_t)
int resize(uint32_t)
khint_t put(const T &, int *)
# void del(khint_t x)


# TODO: de-duplicate from hashtable.pyx
cdef khuint_t SIZE_HINT_LIMIT = (1 << 20) + 7


@cython.wraparound(False)
@cython.boundscheck(False)
def unique_label_indices(const cnp.npy_intp[:] labels) -> cnp.ndarray:
"""
Indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
np.unique(labels, return_index=True)[1]
"""
cdef:
int ret = 0
Py_ssize_t i, n = len(labels)
KHash[cnp.npy_intp, hash[cnp.npy_intp]] *table = (
new KHash[cnp.npy_intp, hash[cnp.npy_intp]]()
)
cnp.ndarray[cnp.npy_intp, ndim=1] arr
vector[cnp.npy_intp] idx = vector[cnp.npy_intp]()

table.resize(min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))

with nogil:
for i in range(n):
table.put(labels[i], &ret)
if ret != 0:
# TODO: pandas has a custom resize operation but we
# rely on C++ stdlib here - how different are they?
idx.push_back(i)

# TODO: must be a cleaner way to do this?
# even arr.data = move(idx.data()) would be better but arr.data is readonly
arr = np.empty(idx.size(), dtype=np.intp)
memcpy(arr.data, idx.const_data(), idx.size() * sizeof(cnp.npy_intp))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The data member in Cython is read-only and I wasn't sure of any ndarray constructor that would properly manage the lifecycle of a raw data buffer. cc @jbrockmendel in case you know of a better way to do this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be the advantage of using a vector here over just putting things into the ndarray and resizing it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think @seberg is the person to ask about this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lithomas1 maybe you could do without it, although this was trying to stay a faithful port of the current codebase which creates a custom templated Vector class

std::vector also has the advantage of working out of the box and using RAII; if you were to do this with a raw buffer it takes a few more steps and requires manual memory management, along with diving into ndarray internals

arr = arr[np.asarray(labels)[arr].argsort()]

return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
48 changes: 0 additions & 48 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -445,51 +445,3 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None):
res_mask = np.zeros(j+1, dtype=np.bool_)
res_mask[j] = True
return modes[:j + 1], res_mask


{{py:

# name, dtype, ttype, c_type
dtypes = [('Int64', 'int64', 'int64', 'int64_t'),
('Int32', 'int32', 'int32', 'int32_t'), ]

}}

{{for name, dtype, ttype, c_type in dtypes}}


@cython.wraparound(False)
@cython.boundscheck(False)
def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
"""
Indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
np.unique(labels, return_index=True)[1]
"""
cdef:
int ret = 0
Py_ssize_t i, n = len(labels)
kh_{{ttype}}_t *table = kh_init_{{ttype}}()
{{name}}Vector idx = {{name}}Vector()
ndarray[{{c_type}}, ndim=1] arr
{{name}}VectorData *ud = idx.data

kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))

with nogil:
for i in range(n):
kh_put_{{ttype}}(table, labels[i], &ret)
if ret != 0:
if needs_resize(ud):
with gil:
idx.resize()
append_data_{{ttype}}(ud, i)

kh_destroy_{{ttype}}(table)

arr = idx.to_array()
arr = arr[np.asarray(labels)[arr].argsort()]

return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr

{{endfor}}
209 changes: 209 additions & 0 deletions pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#ifndef KHASH_HPP
#define KHASH_HPP

#include <cstdlib> // for malloc() etc
#include <cstring> // for memset()
#include <functional>
#include <memory>

#include <stdint.h> // for uint32_t

namespace klib {

#ifndef kroundup32 // FIXME: doesn't work for 64-bit integers
#define kroundup32(x) \
(--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, (x) |= (x) >> 8, \
(x) |= (x) >> 16, ++(x))
#endif

#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3)
#define __ac_set_isdel_false(flag, i) \
(flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
#define __ac_set_isempty_false(flag, i) \
(flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
#define __ac_set_isboth_false(flag, i) \
(flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1))

#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4)

template <class T, class Hash, class Eq = std::equal_to<T>,
typename khint_t = uint32_t>
class KHash {
khint_t n_buckets, count, n_occupied, upper_bound;
uint32_t *flags;
T *keys;

public:
KHash()
: n_buckets(0), count(0), n_occupied(0), upper_bound(0), flags(NULL),
keys(NULL){};
~KHash() {
std::free(flags);
std::free(keys);
};
khint_t capacity(void) const { return n_buckets; };
khint_t size(void) const { return count; };
khint_t begin(void) const { return 0; };
khint_t end(void) const { return n_buckets; };

void exist(khint_t x) const { return !__ac_iseither(flags, x); };
T &at(khint_t x) { return keys[x]; };

khint_t get(const T &key) const {
if (n_buckets) {
khint_t k, i, last, mask, step = 0;
mask = n_buckets - 1;
k = Hash()(key);
i = k & mask;
last = i;
while (!__ac_isempty(flags, i) &&
(__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
i = (i + (++step)) & mask;
if (i == last)
return n_buckets;
}
return __ac_iseither(flags, i) ? n_buckets : i;
} else
return 0;
};

int resize(khint_t new_n_buckets) {
uint32_t *new_flags = 0;
khint_t j = 1;
{
kroundup32(new_n_buckets);
if (new_n_buckets < 4)
new_n_buckets = 4;
if (count >= (new_n_buckets >> 1) + (new_n_buckets >> 2))
j = 0; /* requested count is too small */
else { /* hash table count to be changed (shrink or expand); rehash */
new_flags = (uint32_t *)std::malloc(__ac_fsize(new_n_buckets) *
sizeof(uint32_t));
if (!new_flags)
return -1;
::memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(uint32_t));
if (n_buckets < new_n_buckets) { /* expand */
T *new_keys =
(T *)std::realloc((void *)keys, new_n_buckets * sizeof(T));
if (!new_keys) {
std::free(new_flags);
return -1;
}
keys = new_keys;
} /* otherwise shrink */
}
}
if (j) { /* rehashing is needed */
for (j = 0; j != n_buckets; ++j) {
if (__ac_iseither(flags, j) == 0) {
T key = keys[j];
khint_t new_mask;
new_mask = new_n_buckets - 1;
__ac_set_isdel_true(flags, j);
while (1) { /* kick-out process; sort of like in Cuckoo hashing */
khint_t k, i, step = 0;
k = Hash()(key);
i = k & new_mask;
while (!__ac_isempty(new_flags, i))
i = (i + (++step)) & new_mask;
__ac_set_isempty_false(new_flags, i);
if (i < n_buckets && __ac_iseither(flags, i) ==
0) { /* kick out the existing element */
{
T tmp = keys[i];
keys[i] = key;
key = tmp;
}
__ac_set_isdel_true(
flags, i); /* mark it as deleted in the old hash table */
} else { /* write the element and jump out of the loop */
keys[i] = key;
break;
}
}
}
}
if (n_buckets > new_n_buckets) /* shrink the hash table */
keys = (T *)std::realloc((void *)keys, new_n_buckets * sizeof(T));
std::free(flags); /* free the working space */
flags = new_flags;
n_buckets = new_n_buckets;
n_occupied = count;
upper_bound = (n_buckets >> 1) + (n_buckets >> 2);
}
return 0;
};

khint_t put(const T &key, int *ret) {
khint_t x;
if (n_occupied >= upper_bound) { /* update the hash table */
if (n_buckets > (count << 1)) {
if (resize(n_buckets - 1) < 0) { /* clear "deleted" elements */
*ret = -1;
return n_buckets;
}
} else if (resize(n_buckets + 1) < 0) { /* expand the hash table */
*ret = -1;
return n_buckets;
}
} /* TODO: to implement automatically shrinking; resize() already support
shrinking */
{
khint_t k, i, site, last, mask = n_buckets - 1, step = 0;
x = site = n_buckets;
k = Hash()(key);
i = k & mask;
if (__ac_isempty(flags, i))
x = i; /* for speed up */
else {
last = i;
while (!__ac_isempty(flags, i) &&
(__ac_isdel(flags, i) || !Eq()(keys[i], key))) {
if (__ac_isdel(flags, i))
site = i;
i = (i + (++step)) & mask;
if (i == last) {
x = site;
break;
}
}
if (x == n_buckets) {
if (__ac_isempty(flags, i) && site != n_buckets)
x = site;
else
x = i;
}
}
}
if (__ac_isempty(flags, x)) { /* not present at all */
keys[x] = key;
__ac_set_isboth_false(flags, x);
++count;
++n_occupied;
*ret = 1;
} else if (__ac_isdel(flags, x)) { /* deleted */
keys[x] = key;
__ac_set_isboth_false(flags, x);
++count;
*ret = 2;
} else
*ret = 0; /* Don't touch keys[x] if present and not deleted */
return x;
};

void del(khint_t x) {
if (x != n_buckets && !__ac_iseither(flags, x)) {
__ac_set_isdel_true(flags, x);
--count;
}
};
};

} // end of namespace klib

#endif
13 changes: 13 additions & 0 deletions pandas/_libs/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,19 @@ foreach ext_name, ext_dict : libs_sources
)
endforeach

# hashtable_cpp is an exception because it requires cpp compiler
py.extension_module(
'hashtable_cpp',
['hashtable_cpp.pyx'],
cpp_args: '-ffunction-sections',
link_args: '-Wl,--gc-sections',
cython_args: cython_args,
include_directories: [inc_np, inc_pd],
subdir: 'pandas/_libs',
override_options : ['cython_language=cpp'],
install: true
)

# Basically just __init__.py and the .pyi files
sources_to_install = [
'__init__.py',
Expand Down
Loading
Loading