-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
POC: Move some Tempita to Cython/C++ #56432
Closed
Closed
Changes from 5 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
192a242
move non-tempita code out of tempita
WillAyd 37e6410
working compilation and passing tests
WillAyd 650e336
Revert "move non-tempita code out of tempita"
WillAyd 6756eb4
remove errant file
WillAyd 6c871fc
comment
WillAyd f33e23a
add khash dep
WillAyd 1945abd
add cython args
WillAyd a04808f
remove cimport form pxd
WillAyd d1cc45b
verbose meson
WillAyd 7ef6499
different verbose
WillAyd e26ae5f
Merge remote-tracking branch 'upstream/main' into cpp-templating
WillAyd 66de39c
more build changes
WillAyd 074a30d
Merge branch 'main' into cpp-templating
WillAyd 8709644
Add -ffunction-sections argument
WillAyd 1f97b01
Add fvisibility=hidden
WillAyd 1e7d9e8
gc-sections linker arg
WillAyd File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import cython | ||
import numpy as np | ||
|
||
cimport numpy as cnp | ||
from libc.stdint cimport uint32_t | ||
from libc.string cimport memcpy | ||
from libcpp.vector cimport vector | ||
|
||
from pandas._libs.khash cimport kh_needed_n_buckets | ||
|
||
|
||
cdef extern from "<functional>" namespace "std" nogil: | ||
cdef cppclass hash[T]: | ||
hash() | ||
size_t operator() | ||
|
||
cdef extern from "pandas/vendored/klib/cpp/khash.hpp" namespace "klib" nogil: | ||
cdef cppclass KHash[T, Hash, Eq=*, khint_t=*]: | ||
T *keys | ||
KHash() | ||
# TODO: validate we don't need deconstructor | ||
# ~KHash() | ||
void exist(khint_t x) | ||
T &at(khint_t x) | ||
khint_t get(const T &) | ||
# TODO: make this khint_t | ||
# int resize(khint_t) | ||
int resize(uint32_t) | ||
khint_t put(const T &, int *) | ||
# void del(khint_t x) | ||
|
||
|
||
# TODO: de-duplicate from hashtable.pyx | ||
cdef uint32_t SIZE_HINT_LIMIT = (1 << 20) + 7 | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def unique_label_indices(const cnp.npy_intp[:] labels) -> cnp.ndarray: | ||
""" | ||
Indices of the first occurrences of the unique labels | ||
*excluding* -1. equivalent to: | ||
np.unique(labels, return_index=True)[1] | ||
""" | ||
cdef: | ||
int ret = 0 | ||
Py_ssize_t i, n = len(labels) | ||
KHash[cnp.npy_intp, hash[cnp.npy_intp]] *table = ( | ||
new KHash[cnp.npy_intp, hash[cnp.npy_intp]]() | ||
) | ||
cnp.ndarray[cnp.npy_intp, ndim=1] arr | ||
vector[cnp.npy_intp] idx = vector[cnp.npy_intp]() | ||
|
||
table.resize(min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT)) | ||
|
||
with nogil: | ||
for i in range(n): | ||
table.put(labels[i], &ret) | ||
if ret != 0: | ||
# TODO: pandas has a custom resize operation but we | ||
# rely on C++ stdlib here - how different are they? | ||
idx.push_back(i) | ||
|
||
# TODO: must be a cleaner way to do this? | ||
# even arr.data = move(idx.data()) would be better but arr.data is readonly | ||
arr = np.empty(idx.size(), dtype=np.intp) | ||
memcpy(arr.data, idx.const_data(), idx.size() * sizeof(cnp.npy_intp)) | ||
arr = arr[np.asarray(labels)[arr].argsort()] | ||
|
||
return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
209 changes: 209 additions & 0 deletions
209
pandas/_libs/include/pandas/vendored/klib/cpp/khash.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#ifndef KHASH_HPP | ||
#define KHASH_HPP | ||
|
||
#include <cstdlib> // for malloc() etc | ||
#include <cstring> // for memset() | ||
#include <functional> | ||
#include <memory> | ||
|
||
#include <stdint.h> // for uint32_t | ||
|
||
namespace klib { | ||
|
||
#ifndef kroundup32 // FIXME: doesn't work for 64-bit integers | ||
#define kroundup32(x) \ | ||
(--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, (x) |= (x) >> 8, \ | ||
(x) |= (x) >> 16, ++(x)) | ||
#endif | ||
|
||
#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2) | ||
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1) | ||
#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2) | ||
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1) | ||
#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3) | ||
#define __ac_set_isdel_false(flag, i) \ | ||
(flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1))) | ||
#define __ac_set_isempty_false(flag, i) \ | ||
(flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1))) | ||
#define __ac_set_isboth_false(flag, i) \ | ||
(flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1))) | ||
#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1)) | ||
|
||
#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4) | ||
|
||
template <class T, class Hash, class Eq = std::equal_to<T>, | ||
typename khint_t = uint32_t> | ||
class KHash { | ||
khint_t n_buckets, count, n_occupied, upper_bound; | ||
uint32_t *flags; | ||
T *keys; | ||
|
||
public: | ||
KHash() | ||
: n_buckets(0), count(0), n_occupied(0), upper_bound(0), flags(NULL), | ||
keys(NULL){}; | ||
~KHash() { | ||
std::free(flags); | ||
std::free(keys); | ||
}; | ||
khint_t capacity(void) const { return n_buckets; }; | ||
khint_t size(void) const { return count; }; | ||
khint_t begin(void) const { return 0; }; | ||
khint_t end(void) const { return n_buckets; }; | ||
|
||
void exist(khint_t x) const { return !__ac_iseither(flags, x); }; | ||
T &at(khint_t x) { return keys[x]; }; | ||
|
||
khint_t get(const T &key) const { | ||
if (n_buckets) { | ||
khint_t k, i, last, mask, step = 0; | ||
mask = n_buckets - 1; | ||
k = Hash()(key); | ||
i = k & mask; | ||
last = i; | ||
while (!__ac_isempty(flags, i) && | ||
(__ac_isdel(flags, i) || !Eq()(keys[i], key))) { | ||
i = (i + (++step)) & mask; | ||
if (i == last) | ||
return n_buckets; | ||
} | ||
return __ac_iseither(flags, i) ? n_buckets : i; | ||
} else | ||
return 0; | ||
}; | ||
|
||
int resize(khint_t new_n_buckets) { | ||
uint32_t *new_flags = 0; | ||
khint_t j = 1; | ||
{ | ||
kroundup32(new_n_buckets); | ||
if (new_n_buckets < 4) | ||
new_n_buckets = 4; | ||
if (count >= (new_n_buckets >> 1) + (new_n_buckets >> 2)) | ||
j = 0; /* requested count is too small */ | ||
else { /* hash table count to be changed (shrink or expand); rehash */ | ||
new_flags = (uint32_t *)std::malloc(__ac_fsize(new_n_buckets) * | ||
sizeof(uint32_t)); | ||
if (!new_flags) | ||
return -1; | ||
::memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(uint32_t)); | ||
if (n_buckets < new_n_buckets) { /* expand */ | ||
T *new_keys = | ||
(T *)std::realloc((void *)keys, new_n_buckets * sizeof(T)); | ||
if (!new_keys) { | ||
std::free(new_flags); | ||
return -1; | ||
} | ||
keys = new_keys; | ||
} /* otherwise shrink */ | ||
} | ||
} | ||
if (j) { /* rehashing is needed */ | ||
for (j = 0; j != n_buckets; ++j) { | ||
if (__ac_iseither(flags, j) == 0) { | ||
T key = keys[j]; | ||
khint_t new_mask; | ||
new_mask = new_n_buckets - 1; | ||
__ac_set_isdel_true(flags, j); | ||
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ | ||
khint_t k, i, step = 0; | ||
k = Hash()(key); | ||
i = k & new_mask; | ||
while (!__ac_isempty(new_flags, i)) | ||
i = (i + (++step)) & new_mask; | ||
__ac_set_isempty_false(new_flags, i); | ||
if (i < n_buckets && __ac_iseither(flags, i) == | ||
0) { /* kick out the existing element */ | ||
{ | ||
T tmp = keys[i]; | ||
keys[i] = key; | ||
key = tmp; | ||
} | ||
__ac_set_isdel_true( | ||
flags, i); /* mark it as deleted in the old hash table */ | ||
} else { /* write the element and jump out of the loop */ | ||
keys[i] = key; | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
if (n_buckets > new_n_buckets) /* shrink the hash table */ | ||
keys = (T *)std::realloc((void *)keys, new_n_buckets * sizeof(T)); | ||
std::free(flags); /* free the working space */ | ||
flags = new_flags; | ||
n_buckets = new_n_buckets; | ||
n_occupied = count; | ||
upper_bound = (n_buckets >> 1) + (n_buckets >> 2); | ||
} | ||
return 0; | ||
}; | ||
|
||
khint_t put(const T &key, int *ret) { | ||
khint_t x; | ||
if (n_occupied >= upper_bound) { /* update the hash table */ | ||
if (n_buckets > (count << 1)) { | ||
if (resize(n_buckets - 1) < 0) { /* clear "deleted" elements */ | ||
*ret = -1; | ||
return n_buckets; | ||
} | ||
} else if (resize(n_buckets + 1) < 0) { /* expand the hash table */ | ||
*ret = -1; | ||
return n_buckets; | ||
} | ||
} /* TODO: to implement automatically shrinking; resize() already support | ||
shrinking */ | ||
{ | ||
khint_t k, i, site, last, mask = n_buckets - 1, step = 0; | ||
x = site = n_buckets; | ||
k = Hash()(key); | ||
i = k & mask; | ||
if (__ac_isempty(flags, i)) | ||
x = i; /* for speed up */ | ||
else { | ||
last = i; | ||
while (!__ac_isempty(flags, i) && | ||
(__ac_isdel(flags, i) || !Eq()(keys[i], key))) { | ||
if (__ac_isdel(flags, i)) | ||
site = i; | ||
i = (i + (++step)) & mask; | ||
if (i == last) { | ||
x = site; | ||
break; | ||
} | ||
} | ||
if (x == n_buckets) { | ||
if (__ac_isempty(flags, i) && site != n_buckets) | ||
x = site; | ||
else | ||
x = i; | ||
} | ||
} | ||
} | ||
if (__ac_isempty(flags, x)) { /* not present at all */ | ||
keys[x] = key; | ||
__ac_set_isboth_false(flags, x); | ||
++count; | ||
++n_occupied; | ||
*ret = 1; | ||
} else if (__ac_isdel(flags, x)) { /* deleted */ | ||
keys[x] = key; | ||
__ac_set_isboth_false(flags, x); | ||
++count; | ||
*ret = 2; | ||
} else | ||
*ret = 0; /* Don't touch keys[x] if present and not deleted */ | ||
return x; | ||
}; | ||
|
||
void del(khint_t x) { | ||
if (x != n_buckets && !__ac_iseither(flags, x)) { | ||
__ac_set_isdel_true(flags, x); | ||
--count; | ||
} | ||
}; | ||
}; | ||
|
||
} // end of namespace klib | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
data
member in Cython is read-only and I wasn't sure of any ndarray constructor that would properly manage the lifecycle of a raw data buffer. cc @jbrockmendel in case you know of a better way to do thisThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would be the advantage of using a vector here over just putting things into the ndarray and resizing it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think @seberg is the person to ask about this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lithomas1 maybe you could do without it, although this was trying to stay a faithful port of the current codebase which creates a custom templated Vector class
std::vector also has the advantage of working out of the box and using RAII; if you were to do this with a raw buffer it takes a few more steps and requires manual memory management, along with diving into ndarray internals