Skip to content

Commit

Permalink
[iterator] store last key offset when yielding
Browse files Browse the repository at this point in the history
  • Loading branch information
aadesh committed Aug 24, 2022
1 parent b15db8c commit 95ba89f
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 17 deletions.
32 changes: 21 additions & 11 deletions fst_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,8 @@ type Iterator interface {
// lexicographic order. Iterators should be constructed with the FSTIterator
// method on the parent FST structure.
type FSTIterator struct {
f *FST
aut Automaton
maxQ int
f *FST
aut Automaton

cache fstIteratorCache

Expand All @@ -71,7 +70,8 @@ type FSTIterator struct {
valsStack []uint64
autStatesStack []int

nextStart []byte
nextStart []byte
lastOffset int
}

type fstIteratorCache struct {
Expand Down Expand Up @@ -200,7 +200,9 @@ func (i *FSTIterator) prepare(key []byte) error {
continue
}

i.maxQ = maxQ
i.nextStart = append(i.nextStart[:0], i.keysStack...)
i.lastOffset = maxQ

return nil
}

Expand All @@ -224,29 +226,33 @@ func (i *FSTIterator) Current() ([]byte, uint64) {
// or the advancement goes beyond the configured endKeyExclusive, then
// ErrIteratorDone is returned.
func (i *FSTIterator) Next() error {
return i.next(i.maxQ, -1)
return i.next(i.lastOffset, -1)
}

func (i *FSTIterator) Step(maxNodes int) (int, error) {
return i.nextStep(i.maxQ, maxNodes)
s, err := i.nextStep(i.lastOffset, maxNodes)
return s, err
}

func (i *FSTIterator) next(lastOffset int, maxNodes int) error {
_, err := i.nextStep(lastOffset, maxNodes)
return err
}

func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) {
// remember where we started
func (i *FSTIterator) prepareForNext() {
i.nextStart = append(i.nextStart[:0], i.keysStack...)
i.maxQ = -1
i.lastOffset = -1
}

func (i *FSTIterator) nextStep(lastOffset int, maxNodes int) (int, error) {
// remember where we started
nextOffset := lastOffset + 1
iterations := 0

OUTER:
for true {
if maxNodes > 0 && iterations == maxNodes {
i.lastOffset = nextOffset - 1
return iterations, ErrIteratorYield
}

Expand All @@ -257,6 +263,7 @@ OUTER:
if curr.Final() && i.aut.IsMatch(autCurr) &&
bytes.Compare(i.keysStack, i.nextStart) > 0 {
// in final state greater than start key
i.prepareForNext()
return iterations, nil
}

Expand All @@ -276,6 +283,7 @@ OUTER:
// push onto stack
next, err := i.stateGet(nextAddr)
if err != nil {
i.prepareForNext()
return iterations, err
}

Expand All @@ -288,6 +296,7 @@ OUTER:
// check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
i.prepareForNext()
return iterations, ErrIteratorDone
}

Expand Down Expand Up @@ -316,6 +325,7 @@ OUTER:
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
}

i.prepareForNext()
return iterations, ErrIteratorDone
}

Expand All @@ -336,7 +346,7 @@ func (i *FSTIterator) seek(key []byte) error {
if !i.statesStack[len(i.statesStack)-1].Final() ||
!i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) ||
bytes.Compare(i.keysStack, key) < 0 {
return i.next(i.maxQ, -1)
return i.next(i.lastOffset, -1)
}

return nil
Expand Down
227 changes: 221 additions & 6 deletions fst_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ func TestRegexpSearch(t *testing.T) {
}
}

func TestIteratorRegexpLazySearchNext(t *testing.T) {
func TestIterator_Regexp_LazySearch(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
Expand Down Expand Up @@ -705,24 +705,239 @@ func TestIteratorRegexpLazySearchNext(t *testing.T) {
t.Fatalf("error expected lazily initialized search")
}

_, err = itr.Step(1)
niterations, err := itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != ErrIteratorYield {
t.Fatalf("error expected yield but received: %v", err)
}

_, err = itr.Step(20)
for {
niterations, err = itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
if err == ErrIteratorYield {
continue
} else if err == ErrIteratorDone {
break
}

t.Fatalf("unexpected iterator step error: %v", err)
}

key, val := itr.Current()
got[string(key)] = val
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_Next(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*ur.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("iterator error: %v", err)
t.Fatalf("error creating fst iterator: %v", err)
}

err = itr.Next()
if err != nil {
t.Fatalf("error iterating next: %v", err)
}

key, val := itr.Current()
got[string(key)] = val

err = itr.Next()
if err != ErrIteratorDone {
t.Fatalf("error expected done: %v", err)
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_Step(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*u.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"tues": 3,
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("error creating fst iterator: %v", err)
}

niterations, err := itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != ErrIteratorYield {
t.Fatalf("error expected yield but received: %v", err)
}

for {
niterations, err = itr.Step(1)
if niterations != 1 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
if err == ErrIteratorYield {
continue
} else if err == ErrIteratorDone {
break
}

t.Fatalf("unexpected iterator step error: %v", err)
}

key, val := itr.Current()
got[string(key)] = val
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

func TestIterator_Regexp_LazySearch_LargeStep(t *testing.T) {
var buf bytes.Buffer
b, err := New(&buf, nil)
if err != nil {
t.Fatalf("error creating builder: %v", err)
}

err = insertStringMap(b, smallSample)
if err != nil {
t.Fatalf("error building: %v", err)
}

err = b.Close()
if err != nil {
t.Fatalf("error closing: %v", err)
}

fst, err := Load(buf.Bytes())
if err != nil {
t.Fatalf("error loading set: %v", err)
}

r, err := regexp.New(`.*u.*`)
if err != nil {
t.Fatalf("error building regexp automaton: %v", err)
}

want := map[string]uint64{
"tues": 3,
"thurs": 5,
}

got := map[string]uint64{}
itr, err := fst.LazySearch(r, nil, nil)
if err != nil {
t.Fatalf("error creating fst iterator: %v", err)
}

niterations, err := itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
t.Fatalf("error iterating: %v", err)
}

key, val := itr.Current()
got[string(key)] = val

niterations, err = itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

if err != nil {
t.Fatalf("error iterating: %v", err)
}

key, val = itr.Current()
got[string(key)] = val

niterations, err = itr.Step(50)
if niterations > 50 {
t.Fatalf("error expected only one node iteration")
}

_, err = itr.Step(20)
if err != ErrIteratorDone {
t.Fatalf("iterator error: %v", err)
t.Fatalf("error iterating, expected done: %v", err)
}

if !reflect.DeepEqual(want, got) {
t.Errorf("expected %v, got: %v", want, got)
}
}

0 comments on commit 95ba89f

Please sign in to comment.