From cdcbf36aebb0e5d54072bd96a6fe4932f1fd6ae0 Mon Sep 17 00:00:00 2001 From: Matthew Barnett Date: Sun, 24 Dec 2023 00:13:19 +0000 Subject: [PATCH] The escape function no longer escapes \x00. It's not necessary. Inline flags can now be turned off and apply to what follows. Added \R to match line endings. --- .github/workflows/main.yml | 10 +++++- changelog.txt | 8 +++++ regex_3/_regex_core.py | 25 ++++++--------- regex_3/regex.py | 6 +--- regex_3/test_regex.py | 59 +++++++++++++++--------------------- setup.py | 2 +- tools/build_regex_unicode.py | 2 +- 7 files changed, 54 insertions(+), 58 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e85bbd3..ad92bfa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,7 +45,7 @@ jobs: env: # macOS archs - CIBW_ARCHS_MACOS: "x86_64 arm64" + CIBW_ARCHS_MACOS: "x86_64 arm64 universal2" steps: - uses: actions/checkout@v3 @@ -64,6 +64,14 @@ jobs: name: regex-files path: wheelhouse/*.whl + - name: Create GitHub release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }} + with: + tag_name: ${{ github.ref }} + title: Release ${{ github.ref }} + # Build source distribution & manylinux1_x86_64 wheels # These two jobs build: # 1, build_wheels (above): manylinux1_i686 / manylinux2014_x86_64 diff --git a/changelog.txt b/changelog.txt index b0571e5..67e11b2 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,11 @@ +Version: 2023.12.23 + + The escape function no longer escapes \x00. It's not necessary. + + Inline flags can now be turned off and apply to what follows. + + Added \R to match line endings. + Version: 2023.10.3 Updated to Unicode 15.1.0. diff --git a/regex_3/_regex_core.py b/regex_3/_regex_core.py index 1db2855..c2314f8 100644 --- a/regex_3/_regex_core.py +++ b/regex_3/_regex_core.py @@ -1150,22 +1150,7 @@ def parse_flags_subpattern(source, info): def parse_positional_flags(source, info, flags_on, flags_off): "Parses positional flags." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version == VERSION0: - # Positional flags are global and can only be turned on. - if flags_off: - raise error("bad inline flags: cannot turn flags off", - source.string, source.pos) - - new_global_flags = flags_on & ~info.global_flags - if new_global_flags: - info.global_flags |= new_global_flags - - # A global has been turned on, so reparse the pattern. - raise _UnscopedFlagSet(info.global_flags) - else: - info.flags = (info.flags | flags_on) & ~flags_off - + info.flags = (info.flags | flags_on) & ~flags_off source.ignore_space = bool(info.flags & VERBOSE) def parse_name(source, allow_numeric=False, allow_group_0=False): @@ -1233,6 +1218,14 @@ def parse_escape(source, info, in_set): elif ch in "pP": # A Unicode property, positive or negative. return parse_property(source, info, ch == "p", in_set) + elif ch == "R" and not in_set: + # A line ending. + charset = [0x0A, 0x0B, 0x0C, 0x0D] + if info.guess_encoding == UNICODE: + charset.extend([0x85, 0x2028, 0x2029]) + + return Atomic(Branch([String([0x0D, 0x0A]), SetUnion(info, [Character(c) + for c in charset])])) elif ch == "X" and not in_set: # A grapheme cluster. return Grapheme() diff --git a/regex_3/regex.py b/regex_3/regex.py index 79a199f..53b356a 100644 --- a/regex_3/regex.py +++ b/regex_3/regex.py @@ -241,7 +241,7 @@ "VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__", "__doc__", "RegexFlag"] -__version__ = "2.5.135" +__version__ = "2.5.136" # -------------------------------------------------------------------- # Public interface. @@ -392,8 +392,6 @@ def escape(pattern, special_only=True, literal_spaces=False): elif c in _METACHARS or c.isspace(): s.append("\\") s.append(c) - elif c == "\x00": - s.append("\\000") else: s.append(c) else: @@ -402,8 +400,6 @@ def escape(pattern, special_only=True, literal_spaces=False): s.append(c) elif c in _ALNUM: s.append(c) - elif c == "\x00": - s.append("\\000") else: s.append("\\") s.append(c) diff --git a/regex_3/test_regex.py b/regex_3/test_regex.py index 1128515..21cdb8a 100644 --- a/regex_3/test_regex.py +++ b/regex_3/test_regex.py @@ -911,10 +911,9 @@ def test_inline_flags(self): p = regex.compile('(?iu)' + lower_char) self.assertEqual(bool(p.match(upper_char)), True) + # Changed to positional flags in regex 2023.12.23. self.assertEqual(bool(regex.match(r"(?i)a", "A")), True) - self.assertEqual(bool(regex.match(r"a(?i)", "A")), True) - self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True) - self.assertEqual(regex.match(r"a(?iV1)", "A"), None) + self.assertEqual(regex.match(r"a(?i)", "A"), None) def test_dollar_matches_twice(self): # $ matches the end of string, and just before the terminating \n. @@ -1396,18 +1395,15 @@ def test_scoped_and_inline_flags(self): # Issues 433028, 433024, 433027. self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2)) self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None) - - self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda: - regex.search(r"(?V0-i)Ab", "ab", flags=regex.I)) + # Changed to positional flags in regex 2023.12.23. + self.assertEqual(regex.search(r"A(?i)b", "ab"), None) self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None) self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None) - self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None) + self.assertEqual(regex.search(r"(?-i)Ab", "ab", flags=regex.I), None) self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None) - self.assertEqual(regex.search(r"A(?V1-i)b", "ab", - flags=regex.I).span(), (0, 2)) + self.assertEqual(regex.search(r"A(?-i)b", "ab", flags=regex.I).span(), + (0, 2)) def test_repeated_repeats(self): # Issue 2537. @@ -1820,12 +1816,10 @@ def test_various(self): ('a.*b', 'acc\nccb', '', ascii(None)), ('a.{4,5}b', 'acc\nccb', '', ascii(None)), ('a.b', 'a\rb', '0', ascii('a\rb')), - # The new behaviour is that the inline flag affects only what follows. - ('a.b(?s)', 'a\nb', '0', ascii('a\nb')), - ('a.b(?sV1)', 'a\nb', '', ascii(None)), + # Changed to positional flags in regex 2023.12.23. + ('a.b(?s)', 'a\nb', '', ascii(None)), ('(?s)a.b', 'a\nb', '0', ascii('a\nb')), - ('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')), - ('a.*(?sV1)b', 'acc\nccb', '', ascii(None)), + ('a.*(?s)b', 'acc\nccb', '', ascii(None)), ('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')), ('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')), @@ -2345,12 +2339,9 @@ def test_various(self): # Not an error under PCRE/PRE: # When the new behaviour is turned on positional inline flags affect # only what follows. - ('w(?i)', 'W', '0', ascii('W')), - ('w(?iV1)', 'W', '0', ascii(None)), + ('w(?i)', 'W', '0', ascii(None)), ('w(?i)', 'w', '0', ascii('w')), - ('w(?iV1)', 'w', '0', ascii('w')), ('(?i)w', 'W', '0', ascii('W')), - ('(?iV1)w', 'W', '0', ascii('W')), # Comments using the x embedded pattern modifier. ("""(?x)w# comment 1 @@ -2403,14 +2394,10 @@ def test_various(self): # Bug 114033: nothing to repeat. (r'(x?)?', 'x', '0', ascii('x')), # Bug 115040: rescan if flags are modified inside pattern. - # If the new behaviour is turned on then positional inline flags - # affect only what follows. - (r' (?x)foo ', 'foo', '0', ascii('foo')), - (r' (?V1x)foo ', 'foo', '0', ascii(None)), + # Changed to positional flags in regex 2023.12.23. + (r' (?x)foo ', 'foo', '0', ascii(None)), (r'(?x) foo ', 'foo', '0', ascii('foo')), - (r'(?V1x) foo ', 'foo', '0', ascii('foo')), (r'(?x)foo ', 'foo', '0', ascii('foo')), - (r'(?V1x)foo ', 'foo', '0', ascii('foo')), # Bug 115618: negative lookahead. (r'(?