diff --git a/hv.c b/hv.c index 1703084d7dd6..30eafc274c72 100644 --- a/hv.c +++ b/hv.c @@ -3686,39 +3686,11 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, (UV)flags); if (!chain) goto ret; - if (flags & REFCOUNTED_HE_KEY_UTF8) { - /* For searching purposes, canonicalise to Latin-1 where possible. */ - const char *keyend = keypv + keylen, *p; - STRLEN nonascii_count = 0; - for (p = keypv; p != keyend; p++) { - if (! UTF8_IS_INVARIANT(*p)) { - if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { - goto canonicalised_key; - } - nonascii_count++; - p++; - } - } - if (nonascii_count) { - char *q; - const char *p = keypv, *keyend = keypv + keylen; - keylen -= nonascii_count; - Newx(q, keylen, char); - SAVEFREEPV(q); - keypv = q; - for (; p != keyend; p++, q++) { - U8 c = (U8)*p; - if (UTF8_IS_INVARIANT(c)) { - *q = (char) c; - } - else { - p++; - *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p); - } - } - } + /* For searching purposes, canonicalise to Latin-1 where possible. */ + if ( flags & REFCOUNTED_HE_KEY_UTF8 + && utf8_to_bytes_temp_pv(&keypv, &keylen)) + { flags &= ~REFCOUNTED_HE_KEY_UTF8; - canonicalised_key: ; } utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0; if (!hash) @@ -3861,39 +3833,11 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, } hekflags = value_type; - if (flags & REFCOUNTED_HE_KEY_UTF8) { - /* Canonicalise to Latin-1 where possible. */ - const char *keyend = keypv + keylen, *p; - STRLEN nonascii_count = 0; - for (p = keypv; p != keyend; p++) { - if (! UTF8_IS_INVARIANT(*p)) { - if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { - goto canonicalised_key; - } - nonascii_count++; - p++; - } - } - if (nonascii_count) { - char *q; - const char *p = keypv, *keyend = keypv + keylen; - keylen -= nonascii_count; - Newx(q, keylen, char); - SAVEFREEPV(q); - keypv = q; - for (; p != keyend; p++, q++) { - U8 c = (U8)*p; - if (UTF8_IS_INVARIANT(c)) { - *q = (char) c; - } - else { - p++; - *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p); - } - } - } + /* Canonicalise to Latin-1 where possible. */ + if ( (flags & REFCOUNTED_HE_KEY_UTF8) + && utf8_to_bytes_temp_pv(&keypv, &keylen)) + { flags &= ~REFCOUNTED_HE_KEY_UTF8; - canonicalised_key: ; } if (flags & REFCOUNTED_HE_KEY_UTF8) hekflags |= HVhek_UTF8;