Skip to content

Commit

Permalink
Applied requested changes
Browse files Browse the repository at this point in the history
- Document MaintainCaptureOrder option
- Use return in `assignNameSlots` and remove else
- Add test with MaintainCaptureOrder not provided
- Change the MaintainCaptureOrder value to `0x0400`
- Remove the `o` inline option
- Add comment to explain why `autocap` is consumed
  • Loading branch information
CIAvash committed May 13, 2021
1 parent 896189e commit 690cfad
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 67 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,20 @@ if isMatch, _ := re.MatchString(`Something to match`); isMatch {

This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).

## MaintainCaptureOrder mode
The default behavior of `regexp2` is to match the .NET regexp engine, which unlike PCRE, doesn't maintain the order of the captures and appends the named capture groups to the end of captured groups. Using the `MaintainCaptureOrder` option when compiling a regexp will keep the order of named and unnamed capture groups.

```go
re := regexp2.MustCompile(`(?<first>This) (is) a (?<last>test)`, regexp2.RE2)
if match, _ := re.FindStringMatch(`This is a test`); match != nil {
// match.Groups()[1].String() == "This"
// match.Groups()[1].Name == "first"
// match.Groups()[2].String() == "is"
// match.Groups()[2].Name == "2"
// match.Groups()[3].String() == "test"
// match.Groups()[3].Name == "last"
}
```

## Library features that I'm still working on
- Regex split
Expand Down
2 changes: 1 addition & 1 deletion regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
MaintainCaptureOrder = 0x1000 // Maintain named and unnamed capture order
MaintainCaptureOrder = 0x0400 // Maintain named and unnamed capture order
)

func (re *Regexp) RightToLeft() bool {
Expand Down
34 changes: 23 additions & 11 deletions regexp_MaintainCaptureOrder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ func TestMaintainCaptureOrder_Basic(t *testing.T) {
if want, got := `this`, string(m.GroupByName(`first`).Runes()); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `first`, m.regex.GroupNameFromNumber(1); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `testing`, groups[2].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
Expand All @@ -54,13 +57,13 @@ func TestMaintainCaptureOrder_Basic(t *testing.T) {
}
}

func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
r, err := Compile("(?si)(?<first>this).+?\n(testing).+?(?<last>stuff)", MaintainCaptureOrder)
func TestMaintainCaptureOrder_Mode_Not_Enabled(t *testing.T) {
r, err := Compile("(?<first>this).+?(testing).+?(?<last>stuff)", 0)
// t.Logf("code dump: %v", r.code.Dump())
if err != nil {
t.Errorf("unexpected compile err: %v", err)
}
text := "This is a \ntesting stuff"
text := `this is a testing stuff`
m, err := r.FindStringMatch(text)
if err != nil {
t.Errorf("unexpected match err: %v", err)
Expand All @@ -78,16 +81,22 @@ func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
if want, got := text, groups[0].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `This`, groups[1].String(); want != got {
if want, got := `testing`, groups[1].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `first`, groups[1].Name; want != got {
if want, got := `1`, groups[1].Name; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `testing`, groups[2].String(); want != got {
if want, got := `this`, string(m.GroupByName(`first`).Runes()); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `2`, groups[2].Name; want != got {
if want, got := `first`, m.regex.GroupNameFromNumber(2); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `this`, groups[2].String(); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `first`, groups[2].Name; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `stuff`, groups[3].String(); want != got {
Expand All @@ -96,10 +105,13 @@ func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
if want, got := `last`, groups[3].Name; want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
if want, got := `stuff`, string(m.GroupByNumber(3).Runes()); want != got {
t.Fatalf("Wanted '%v'\nGot '%v'", want, got)
}
}

func TestMaintainCaptureOrder_Enable_Inline(t *testing.T) {
r, err := Compile("(?sio)(?<first>this).+?\n(testing).+?(?<last>stuff)", 0)
func TestMaintainCaptureOrder_With_Other_Options(t *testing.T) {
r, err := Compile("(?si)(?<first>this).+?\n(testing).+?(?<last>stuff)", MaintainCaptureOrder)
// t.Logf("code dump: %v", r.code.Dump())
if err != nil {
t.Errorf("unexpected compile err: %v", err)
Expand Down Expand Up @@ -142,8 +154,8 @@ func TestMaintainCaptureOrder_Enable_Inline(t *testing.T) {
}
}

func TestMaintainCaptureOrder_Inline_No_Capture_Groups(t *testing.T) {
r, err := Compile("(?o)this.+?testing.+?stuff", 0)
func TestMaintainCaptureOrder_No_Capture_Groups(t *testing.T) {
r, err := Compile("this.+?testing.+?stuff", MaintainCaptureOrder)
// t.Logf("code dump: %v", r.code.Dump())
if err != nil {
t.Errorf("unexpected compile err: %v", err)
Expand Down
111 changes: 56 additions & 55 deletions syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 compat mode
MaintainCaptureOrder = 0x1000 // "o" Maintain named and unnamed capture order
MaintainCaptureOrder = 0x0400 // Maintain named and unnamed capture order
)

func optionFromCode(ch rune) RegexOptions {
Expand All @@ -44,8 +44,6 @@ func optionFromCode(ch rune) RegexOptions {
return Debug
case 'e', 'E':
return ECMAScript
case 'o', 'O':
return MaintainCaptureOrder
default:
return 0
}
Expand Down Expand Up @@ -242,75 +240,76 @@ func (p *parser) assignNameSlots() {
if len(p.capnamelist) == 0 || p.capnamelist[0] != `0` {
p.capnamelist = append([]string{fmt.Sprint(0)}, p.capnamelist...)
}
} else {
if p.capnames != nil {
for _, name := range p.capnamelist {
for p.isCaptureSlot(p.autocap) {
p.autocap++
}
pos := p.capnames[name]
p.capnames[name] = p.autocap
p.noteCaptureSlot(p.autocap, pos)
return
}

if p.capnames != nil {
for _, name := range p.capnamelist {
for p.isCaptureSlot(p.autocap) {
p.autocap++
}
pos := p.capnames[name]
p.capnames[name] = p.autocap
p.noteCaptureSlot(p.autocap, pos)

p.autocap++
}

// if the caps array has at least one gap, construct the list of used slots
if p.capcount < p.captop {
p.capnumlist = make([]int, p.capcount)
i := 0
}

for k := range p.caps {
p.capnumlist[i] = k
i++
}
// if the caps array has at least one gap, construct the list of used slots
if p.capcount < p.captop {
p.capnumlist = make([]int, p.capcount)
i := 0

sort.Ints(p.capnumlist)
for k := range p.caps {
p.capnumlist[i] = k
i++
}

// merge capsnumlist into capnamelist
if p.capnames != nil || p.capnumlist != nil {
var oldcapnamelist []string
var next int
var k int
sort.Ints(p.capnumlist)
}

if p.capnames == nil {
oldcapnamelist = nil
p.capnames = make(map[string]int)
p.capnamelist = []string{}
next = -1
} else {
oldcapnamelist = p.capnamelist
p.capnamelist = []string{}
next = p.capnames[oldcapnamelist[0]]
}
// merge capsnumlist into capnamelist
if p.capnames != nil || p.capnumlist != nil {
var oldcapnamelist []string
var next int
var k int

for i := 0; i < p.capcount; i++ {
j := i
if p.capnumlist != nil {
j = p.capnumlist[i]
}
if p.capnames == nil {
oldcapnamelist = nil
p.capnames = make(map[string]int)
p.capnamelist = []string{}
next = -1
} else {
oldcapnamelist = p.capnamelist
p.capnamelist = []string{}
next = p.capnames[oldcapnamelist[0]]
}

if next == j {
p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
k++
for i := 0; i < p.capcount; i++ {
j := i
if p.capnumlist != nil {
j = p.capnumlist[i]
}

if k == len(oldcapnamelist) {
next = -1
} else {
next = p.capnames[oldcapnamelist[k]]
}
if next == j {
p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
k++

if k == len(oldcapnamelist) {
next = -1
} else {
//feature: culture?
str := strconv.Itoa(j)
p.capnamelist = append(p.capnamelist, str)
p.capnames[str] = j
}
next = p.capnames[oldcapnamelist[k]]
}

} else {
//feature: culture?
str := strconv.Itoa(j)
p.capnamelist = append(p.capnamelist, str)
p.capnames[str] = j
}
}
}
}

func (p *parser) consumeAutocap() int {
Expand Down Expand Up @@ -958,6 +957,8 @@ func (p *parser) scanGroupOpen() (*regexNode, error) {
}

if capnum != -1 && p.useMaintainCaptureOrder() {
// Successfully scanned a named capture group so we need to increment
// our cap number to maintain the order
p.consumeAutocap()
}
} else if ch == '-' {
Expand Down

0 comments on commit 690cfad

Please sign in to comment.