Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refcounted_he_(new|fetch)_pvn: Don't roll-own code #22808

Merged
merged 2 commits into from
Dec 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 60 additions & 108 deletions hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* [p.278 of _The Lord of the Rings_, II/iii: "The Ring Goes South"]
*/

/*
/*
=head1 HV Handling
A HV structure represents a Perl hash. It consists mainly of an array
of pointers, each of which points to a linked list of HE structures. The
Expand Down Expand Up @@ -513,7 +513,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
((flags & HVhek_UTF8)
? SVf_UTF8 : 0));
}

mg->mg_obj = keysv; /* pass key */
uf->uf_index = action; /* pass action */
magic_getuvar(MUTABLE_SV(hv), mg);
Expand Down Expand Up @@ -763,24 +763,25 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
}

if (is_utf8 && !(flags & HVhek_KEYCANONICAL)) {
char * const keysave = (char *)key;
key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
if (is_utf8)
flags |= HVhek_UTF8;
else
flags &= ~HVhek_UTF8;
if (key != keysave) {
if (flags & HVhek_FREEKEY)
Safefree(keysave);
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
/* If the caller calculated a hash, it was on the sequence of
octets that are the UTF-8 form. We've now changed the sequence
of octets stored to that of the equivalent byte representation,
so the hash we need is different. */
hash = 0;
}
char * const keysave = (char *)key;
key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
if (is_utf8)
flags |= HVhek_UTF8;
else
flags &= ~HVhek_UTF8;
if (key != keysave) {
if (flags & HVhek_FREEKEY)
Safefree(keysave);
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
/* If the caller calculated a hash, it was on the sequence of
* octets that are the UTF-8 form. We've now changed the
* sequence of octets stored to that of the equivalent byte
* representation, so the hash we need is different. */
hash = 0;
}
}


if (keysv && (SvIsCOW_shared_hash(keysv))) {
if (HvSHAREKEYS(hv))
keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
Expand Down Expand Up @@ -912,7 +913,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,

not_found:
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
if (!(action & HV_FETCH_ISSTORE)
if (!(action & HV_FETCH_ISSTORE)
&& SvRMAGICAL((const SV *)hv)
&& mg_find((const SV *)hv, PERL_MAGIC_env)) {
unsigned long len;
Expand Down Expand Up @@ -1245,7 +1246,7 @@ Perl_hv_bucket_ratio(pTHX_ HV *hv)
}
else
sv = &PL_sv_zero;

return sv;
}

Expand Down Expand Up @@ -1458,7 +1459,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HvHASKFLAGS_off(hv);
}

/* If this is a stash and the key ends with ::, then someone is
/* If this is a stash and the key ends with ::, then someone is
* deleting a package.
*/
if (sv && SvTYPE(sv) == SVt_PVGV && HvHasENAME(hv)) {
Expand Down Expand Up @@ -2761,7 +2762,7 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
{
assert(*hekp);
if (
(HEK_UTF8(*hekp) || (flags & SVf_UTF8))
(HEK_UTF8(*hekp) || (flags & SVf_UTF8))
? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags)
: (HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len))
) {
Expand Down Expand Up @@ -2824,7 +2825,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
HEK **victim = namep + (count < 0 ? -count : count);
while (victim-- > namep + 1)
if (
(HEK_UTF8(*victim) || (flags & SVf_UTF8))
(HEK_UTF8(*victim) || (flags & SVf_UTF8))
? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags)
: (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len))
) {
Expand All @@ -2847,7 +2848,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
return;
}
if (
count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8))
count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8))
? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags)
: (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len))
)
Expand All @@ -2856,7 +2857,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
}
}
else if(
(HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8))
(HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8))
? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags)
: (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len &&
memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len))
Expand Down Expand Up @@ -3343,22 +3344,22 @@ Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash)
PERL_ARGS_ASSERT_SHARE_HEK;

if (len < 0) {
STRLEN tmplen = -len;
is_utf8 = TRUE;
/* See the note in hv_fetch(). --jhi */
str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
len = tmplen;
/* If we were able to downgrade here, then than means that we were passed
in a key which only had chars 0-255, but was utf8 encoded. */
if (is_utf8)
flags = HVhek_UTF8;
/* If we found we were able to downgrade the string to bytes, then
we should flag that it needs upgrading on keys or each. Also flag
that we need share_hek_flags to free the string. */
if (str != save) {
PERL_HASH(hash, str, len);
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
STRLEN tmplen = -len;
is_utf8 = TRUE;
/* See the note in hv_fetch(). --jhi */
str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8);
len = tmplen;
/* If we were able to downgrade here, then than means that we were passed
in a key which only had chars 0-255, but was utf8 encoded. */
if (is_utf8)
flags = HVhek_UTF8;
/* If we found we were able to downgrade the string to bytes, then
we should flag that it needs upgrading on keys or each. Also flag
that we need share_hek_flags to free the string. */
if (str != save) {
PERL_HASH(hash, str, len);
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
}

return share_hek_flags (str, len, hash, flags);
Expand Down Expand Up @@ -3683,47 +3684,21 @@ SV *
Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
const char *keypv, STRLEN keylen, U32 hash, U32 flags)
{
U8 utf8_flag;
PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;

U8 utf8_flag;
U8 * free_me = NULL;

if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
(UV)flags);
if (!chain)
goto ret;
if (flags & REFCOUNTED_HE_KEY_UTF8) {
/* For searching purposes, canonicalise to Latin-1 where possible. */
const char *keyend = keypv + keylen, *p;
STRLEN nonascii_count = 0;
for (p = keypv; p != keyend; p++) {
if (! UTF8_IS_INVARIANT(*p)) {
if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
goto canonicalised_key;
}
nonascii_count++;
p++;
}
}
if (nonascii_count) {
char *q;
const char *p = keypv, *keyend = keypv + keylen;
keylen -= nonascii_count;
Newx(q, keylen, char);
SAVEFREEPV(q);
keypv = q;
for (; p != keyend; p++, q++) {
U8 c = (U8)*p;
if (UTF8_IS_INVARIANT(c)) {
*q = (char) c;
}
else {
p++;
*q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
}
}
}
/* For searching purposes, canonicalise to Latin-1 where possible. */
if ( flags & REFCOUNTED_HE_KEY_UTF8
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
canonicalised_key: ;
}
utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0;
if (!hash)
Expand All @@ -3743,6 +3718,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8)
#endif
) {
Safefree(free_me);
if (flags & REFCOUNTED_HE_EXISTS)
return (chain->refcounted_he_data[0] & HVrhek_typemask)
== HVrhek_delete
Expand All @@ -3751,6 +3727,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
}
}
ret:
Safefree(free_me);
return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder;
}

Expand Down Expand Up @@ -3835,14 +3812,16 @@ struct refcounted_he *
Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
const char *keypv, STRLEN keylen, U32 hash, SV *value, U32 flags)
{
PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;

STRLEN value_len = 0;
const char *value_p = NULL;
bool is_pv;
char value_type;
char hekflags;
STRLEN key_offset = 1;
struct refcounted_he *he;
PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;
U8 * free_me = NULL;

if (!value || value == &PL_sv_placeholder) {
value_type = HVrhek_delete;
Expand All @@ -3866,39 +3845,11 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
}
hekflags = value_type;

if (flags & REFCOUNTED_HE_KEY_UTF8) {
/* Canonicalise to Latin-1 where possible. */
const char *keyend = keypv + keylen, *p;
STRLEN nonascii_count = 0;
for (p = keypv; p != keyend; p++) {
if (! UTF8_IS_INVARIANT(*p)) {
if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
goto canonicalised_key;
}
nonascii_count++;
p++;
}
}
if (nonascii_count) {
char *q;
const char *p = keypv, *keyend = keypv + keylen;
keylen -= nonascii_count;
Newx(q, keylen, char);
SAVEFREEPV(q);
keypv = q;
for (; p != keyend; p++, q++) {
U8 c = (U8)*p;
if (UTF8_IS_INVARIANT(c)) {
*q = (char) c;
}
else {
p++;
*q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
}
}
}
/* Canonicalise to Latin-1 where possible. */
if ( (flags & REFCOUNTED_HE_KEY_UTF8)
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
canonicalised_key: ;
}
if (flags & REFCOUNTED_HE_KEY_UTF8)
hekflags |= HVhek_UTF8;
Expand Down Expand Up @@ -3938,6 +3889,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
he->refcounted_he_data[0] = hekflags;
he->refcounted_he_refcnt = 1;

Safefree(free_me);
return he;
}

Expand Down Expand Up @@ -4008,7 +3960,7 @@ Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) {
HINTS_REFCNT_LOCK;
new_count = --he->refcounted_he_refcnt;
HINTS_REFCNT_UNLOCK;

if (new_count) {
return;
}
Expand Down
Loading