Skip to content

Commit

Permalink
Use Abseil's character class functions.
Browse files Browse the repository at this point in the history
They are locale-agnostic whereas the libc equivalents aren't...
and thus incur indirections due to checking the current locale.

Change-Id: I5e1350ff33c3090c345b4166afb6b80d92d4a0df
Reviewed-on: https://code-review.googlesource.com/c/re2/+/61890
Reviewed-by: Alex Chernyakhovsky <[email protected]>
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Sep 14, 2023
1 parent a807e8a commit 09de536
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
2 changes: 1 addition & 1 deletion re2/fuzzing/re2_fuzzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void TestOneInput(absl::string_view pattern, const RE2::Options& options,
// counted repetition is involved - whereas the marginal benefit is zero.
// Crudely limit the use of 'k', 'K', 's' and 'S' too because they become
// three-element character classes when case-insensitive and using UTF-8.
// TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
// TODO(junyer): Handle [[:alnum:]] et al. when they start to cause pain.
int char_class = 0;
int backslash_p = 0; // very expensive, so handle specially
for (size_t i = 0; i < pattern.size(); i++) {
Expand Down
9 changes: 5 additions & 4 deletions re2/parse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <vector>

#include "absl/base/macros.h"
#include "absl/strings/ascii.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/pod_array.h"
Expand Down Expand Up @@ -1322,14 +1323,14 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
// Parses a decimal integer, storing it in *np.
// Sets *s to span the remainder of the string.
static bool ParseInteger(absl::string_view* s, int* np) {
if (s->empty() || !isdigit((*s)[0] & 0xFF))
if (s->empty() || !absl::ascii_isdigit((*s)[0] & 0xFF))
return false;
// Disallow leading zeros.
if (s->size() >= 2 && (*s)[0] == '0' && isdigit((*s)[1] & 0xFF))
if (s->size() >= 2 && (*s)[0] == '0' && absl::ascii_isdigit((*s)[1] & 0xFF))
return false;
int n = 0;
int c;
while (!s->empty() && isdigit(c = (*s)[0] & 0xFF)) {
while (!s->empty() && absl::ascii_isdigit(c = (*s)[0] & 0xFF)) {
// Avoid overflow.
if (n >= 100000000)
return false;
Expand Down Expand Up @@ -1468,7 +1469,7 @@ static bool ParseEscape(absl::string_view* s, Rune* rp,
int code;
switch (c) {
default:
if (c < Runeself && !isalpha(c) && !isdigit(c)) {
if (c < Runeself && !absl::ascii_isalnum(c)) {
// Escaped non-word characters are always themselves.
// PCRE is not quite so rigorous: it accepts things like
// \q, but we don't. We once rejected \_, but too many
Expand Down
11 changes: 6 additions & 5 deletions re2/re2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "absl/base/macros.h"
#include "absl/container/fixed_array.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/strutil.h"
Expand Down Expand Up @@ -975,7 +976,7 @@ bool RE2::CheckRewriteString(absl::string_view rewrite,
if (c == '\\') {
continue;
}
if (!isdigit(c)) {
if (!absl::ascii_isdigit(c)) {
*error = "Rewrite schema error: "
"'\\' must be followed by a digit or '\\'.";
return false;
Expand Down Expand Up @@ -1005,7 +1006,7 @@ int RE2::MaxSubmatch(absl::string_view rewrite) {
if (*s == '\\') {
s++;
int c = (s < end) ? *s : -1;
if (isdigit(c)) {
if (absl::ascii_isdigit(c)) {
int n = (c - '0');
if (n > max)
max = n;
Expand All @@ -1029,7 +1030,7 @@ bool RE2::Rewrite(std::string* out,
}
s++;
int c = (s < end) ? *s : -1;
if (isdigit(c)) {
if (absl::ascii_isdigit(c)) {
int n = (c - '0');
if (n >= veclen) {
if (options_.log_errors()) {
Expand Down Expand Up @@ -1110,13 +1111,13 @@ static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
size_t* np, bool accept_spaces) {
size_t n = *np;
if (n == 0) return "";
if (n > 0 && isspace(*str)) {
if (n > 0 && absl::ascii_isspace(*str)) {
// We are less forgiving than the strtoxxx() routines and do not
// allow leading spaces. We do allow leading spaces for floats.
if (!accept_spaces) {
return "";
}
while (n > 0 && isspace(*str)) {
while (n > 0 && absl::ascii_isspace(*str)) {
n--;
str++;
}
Expand Down

0 comments on commit 09de536

Please sign in to comment.