From 95ba89fe1d371267dcc39e7b4e83c442f235bde9 Mon Sep 17 00:00:00 2001 From: aadesh Date: Wed, 24 Aug 2022 12:52:09 -0400 Subject: [PATCH] [iterator] store last key offset when yielding --- fst_iterator.go | 32 +++--- fst_iterator_test.go | 227 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 242 insertions(+), 17 deletions(-) diff --git a/fst_iterator.go b/fst_iterator.go index b6c58ea..38e0464 100644 --- a/fst_iterator.go +++ b/fst_iterator.go @@ -56,9 +56,8 @@ type Iterator interface { // lexicographic order. Iterators should be constructed with the FSTIterator // method on the parent FST structure. type FSTIterator struct { - f *FST - aut Automaton - maxQ int + f *FST + aut Automaton cache fstIteratorCache @@ -71,7 +70,8 @@ type FSTIterator struct { valsStack []uint64 autStatesStack []int - nextStart []byte + nextStart []byte + lastOffset int } type fstIteratorCache struct { @@ -200,7 +200,9 @@ func (i *FSTIterator) prepare(key []byte) error { continue } - i.maxQ = maxQ + i.nextStart = append(i.nextStart[:0], i.keysStack...) + i.lastOffset = maxQ + return nil } @@ -224,11 +226,12 @@ func (i *FSTIterator) Current() ([]byte, uint64) { // or the advancement goes beyond the configured endKeyExclusive, then // ErrIteratorDone is returned. func (i *FSTIterator) Next() error { - return i.next(i.maxQ, -1) + return i.next(i.lastOffset, -1) } func (i *FSTIterator) Step(maxNodes int) (int, error) { - return i.nextStep(i.maxQ, maxNodes) + s, err := i.nextStep(i.lastOffset, maxNodes) + return s, err } func (i *FSTIterator) next(lastOffset int, maxNodes int) error { @@ -236,17 +239,20 @@ func (i *FSTIterator) next(lastOffset int, maxNodes int) error { return err } -func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) { - // remember where we started +func (i *FSTIterator) prepareForNext() { i.nextStart = append(i.nextStart[:0], i.keysStack...) - i.maxQ = -1 + i.lastOffset = -1 +} +func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) { + // remember where we started nextOffset := lastOffset + 1 iterations := 0 OUTER: for true { if maxNodes > 0 && iterations == maxNodes { + i.lastOffset = nextOffset - 1 return iterations, ErrIteratorYield } @@ -257,6 +263,7 @@ OUTER: if curr.Final() && i.aut.IsMatch(autCurr) && bytes.Compare(i.keysStack, i.nextStart) > 0 { // in final state greater than start key + i.prepareForNext() return iterations, nil } @@ -276,6 +283,7 @@ OUTER: // push onto stack next, err := i.stateGet(nextAddr) if err != nil { + i.prepareForNext() return iterations, err } @@ -288,6 +296,7 @@ OUTER: // check to see if new keystack might have gone too far if i.endKeyExclusive != nil && bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { + i.prepareForNext() return iterations, ErrIteratorDone } @@ -316,6 +325,7 @@ OUTER: i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] } + i.prepareForNext() return iterations, ErrIteratorDone } @@ -336,7 +346,7 @@ func (i *FSTIterator) seek(key []byte) error { if !i.statesStack[len(i.statesStack)-1].Final() || !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || bytes.Compare(i.keysStack, key) < 0 { - return i.next(i.maxQ, -1) + return i.next(i.lastOffset, -1) } return nil diff --git a/fst_iterator_test.go b/fst_iterator_test.go index 00ca36a..92cc0d4 100644 --- a/fst_iterator_test.go +++ b/fst_iterator_test.go @@ -663,7 +663,7 @@ func TestRegexpSearch(t *testing.T) { } } -func TestIteratorRegexpLazySearchNext(t *testing.T) { +func TestIterator_Regexp_LazySearch(t *testing.T) { var buf bytes.Buffer b, err := New(&buf, nil) if err != nil { @@ -705,24 +705,239 @@ func TestIteratorRegexpLazySearchNext(t *testing.T) { t.Fatalf("error expected lazily initialized search") } - _, err = itr.Step(1) + niterations, err := itr.Step(1) + if niterations != 1 { + t.Fatalf("error expected only one node iteration") + } + if err != ErrIteratorYield { t.Fatalf("error expected yield but received: %v", err) } - _, err = itr.Step(20) + for { + niterations, err = itr.Step(1) + if niterations != 1 { + t.Fatalf("error expected only one node iteration") + } + + if err != nil { + if err == ErrIteratorYield { + continue + } else if err == ErrIteratorDone { + break + } + + t.Fatalf("unexpected iterator step error: %v", err) + } + + key, val := itr.Current() + got[string(key)] = val + } + + if !reflect.DeepEqual(want, got) { + t.Errorf("expected %v, got: %v", want, got) + } +} + +func TestIterator_Regexp_LazySearch_Next(t *testing.T) { + var buf bytes.Buffer + b, err := New(&buf, nil) + if err != nil { + t.Fatalf("error creating builder: %v", err) + } + + err = insertStringMap(b, smallSample) + if err != nil { + t.Fatalf("error building: %v", err) + } + + err = b.Close() + if err != nil { + t.Fatalf("error closing: %v", err) + } + + fst, err := Load(buf.Bytes()) + if err != nil { + t.Fatalf("error loading set: %v", err) + } + + r, err := regexp.New(`.*ur.*`) + if err != nil { + t.Fatalf("error building regexp automaton: %v", err) + } + + want := map[string]uint64{ + "thurs": 5, + } + + got := map[string]uint64{} + itr, err := fst.LazySearch(r, nil, nil) if err != nil { - t.Fatalf("iterator error: %v", err) + t.Fatalf("error creating fst iterator: %v", err) + } + + err = itr.Next() + if err != nil { + t.Fatalf("error iterating next: %v", err) } key, val := itr.Current() got[string(key)] = val + + err = itr.Next() + if err != ErrIteratorDone { + t.Fatalf("error expected done: %v", err) + } + + if !reflect.DeepEqual(want, got) { + t.Errorf("expected %v, got: %v", want, got) + } +} + +func TestIterator_Regexp_LazySearch_Step(t *testing.T) { + var buf bytes.Buffer + b, err := New(&buf, nil) + if err != nil { + t.Fatalf("error creating builder: %v", err) + } + + err = insertStringMap(b, smallSample) + if err != nil { + t.Fatalf("error building: %v", err) + } + + err = b.Close() + if err != nil { + t.Fatalf("error closing: %v", err) + } + + fst, err := Load(buf.Bytes()) + if err != nil { + t.Fatalf("error loading set: %v", err) + } + + r, err := regexp.New(`.*u.*`) + if err != nil { + t.Fatalf("error building regexp automaton: %v", err) + } + + want := map[string]uint64{ + "tues": 3, + "thurs": 5, + } + + got := map[string]uint64{} + itr, err := fst.LazySearch(r, nil, nil) + if err != nil { + t.Fatalf("error creating fst iterator: %v", err) + } + + niterations, err := itr.Step(1) + if niterations != 1 { + t.Fatalf("error expected only one node iteration") + } + + if err != ErrIteratorYield { + t.Fatalf("error expected yield but received: %v", err) + } + + for { + niterations, err = itr.Step(1) + if niterations != 1 { + t.Fatalf("error expected only one node iteration") + } + + if err != nil { + if err == ErrIteratorYield { + continue + } else if err == ErrIteratorDone { + break + } + + t.Fatalf("unexpected iterator step error: %v", err) + } + + key, val := itr.Current() + got[string(key)] = val + } + if !reflect.DeepEqual(want, got) { t.Errorf("expected %v, got: %v", want, got) } +} + +func TestIterator_Regexp_LazySearch_LargeStep(t *testing.T) { + var buf bytes.Buffer + b, err := New(&buf, nil) + if err != nil { + t.Fatalf("error creating builder: %v", err) + } + + err = insertStringMap(b, smallSample) + if err != nil { + t.Fatalf("error building: %v", err) + } + + err = b.Close() + if err != nil { + t.Fatalf("error closing: %v", err) + } + + fst, err := Load(buf.Bytes()) + if err != nil { + t.Fatalf("error loading set: %v", err) + } + + r, err := regexp.New(`.*u.*`) + if err != nil { + t.Fatalf("error building regexp automaton: %v", err) + } + + want := map[string]uint64{ + "tues": 3, + "thurs": 5, + } + + got := map[string]uint64{} + itr, err := fst.LazySearch(r, nil, nil) + if err != nil { + t.Fatalf("error creating fst iterator: %v", err) + } + + niterations, err := itr.Step(50) + if niterations > 50 { + t.Fatalf("error expected only one node iteration") + } + + if err != nil { + t.Fatalf("error iterating: %v", err) + } + + key, val := itr.Current() + got[string(key)] = val + + niterations, err = itr.Step(50) + if niterations > 50 { + t.Fatalf("error expected only one node iteration") + } + + if err != nil { + t.Fatalf("error iterating: %v", err) + } + + key, val = itr.Current() + got[string(key)] = val + + niterations, err = itr.Step(50) + if niterations > 50 { + t.Fatalf("error expected only one node iteration") + } - _, err = itr.Step(20) if err != ErrIteratorDone { - t.Fatalf("iterator error: %v", err) + t.Fatalf("error iterating, expected done: %v", err) + } + + if !reflect.DeepEqual(want, got) { + t.Errorf("expected %v, got: %v", want, got) } }