Skip to content

Commit

Permalink
Fix flaky test: TestPartitionInstanceLifecycler
Browse files Browse the repository at this point in the history
Fixes #572

Increase polling timeout. 1s seems tight
  • Loading branch information
julienduchesne committed Oct 8, 2024
1 parent 7464c1e commit 91ca849
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 27 deletions.
31 changes: 5 additions & 26 deletions kv/memberlist/memberlist_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"math"
"math/rand"
"net"
"reflect"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -1243,24 +1242,24 @@ func TestRejoin(t *testing.T) {
require.NoError(t, services.StartAndAwaitRunning(context.Background(), mkv2))
defer services.StopAndAwaitTerminated(context.Background(), mkv2) //nolint:errcheck

membersFunc := func() interface{} {
return mkv2.memberlist.NumMembers()
expectMembers := func(expected int) func() bool {
return func() bool { return mkv2.memberlist.NumMembers() == expected }
}

poll(t, 7*time.Second, 2, membersFunc) // Probe interval is 5s, with 2s timeout, so probe for 7s.
require.Eventually(t, expectMembers(2), 10*time.Second, 100*time.Millisecond, "expected 2 members in the cluster")

// Shutdown first KV
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), mkv1))

// Second KV should see single member now.
poll(t, 7*time.Second, 1, membersFunc)
require.Eventually(t, expectMembers(1), 10*time.Second, 100*time.Millisecond, "expected 1 member in the cluster")

// Let's start first KV again. It is not configured to join the cluster, but KV2 is rejoining.
mkv1 = NewKV(cfg1, log.NewNopLogger(), &dnsProviderMock{}, prometheus.NewPedanticRegistry())
require.NoError(t, services.StartAndAwaitRunning(context.Background(), mkv1))
defer services.StopAndAwaitTerminated(context.Background(), mkv1) //nolint:errcheck

poll(t, 7*time.Second, 2, membersFunc)
require.Eventually(t, expectMembers(2), 10*time.Second, 100*time.Millisecond, "expected 2 member in the cluster")
}

func TestMessageBuffer(t *testing.T) {
Expand Down Expand Up @@ -1601,26 +1600,6 @@ func getOrCreateData(in interface{}) *data {
return r
}

// poll repeatedly evaluates condition until we either timeout, or it succeeds.
func poll(t testing.TB, d time.Duration, want interface{}, have func() interface{}) {
t.Helper()

deadline := time.Now().Add(d)
for {
if time.Now().After(deadline) {
break
}
if reflect.DeepEqual(want, have()) {
return
}
time.Sleep(d / 100)
}
h := have()
if !reflect.DeepEqual(want, h) {
t.Fatalf("expected %v, got %v", want, h)
}
}

type testLogger struct {
}

Expand Down
2 changes: 1 addition & 1 deletion ring/partition_instance_lifecycler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func TestPartitionInstanceLifecycler(t *testing.T) {
assert.Eventually(t, func() bool {
actual := getPartitionRingFromStore(t, store, ringKey)
return actual.Partitions[1].State == PartitionActive
}, time.Second, eventuallyTick)
}, 2*lifecycler1aConfig.PollingInterval, eventuallyTick)
})

t.Run("should wait for the configured minimum waiting time before switching a pending partition to active", func(t *testing.T) {
Expand Down

0 comments on commit 91ca849

Please sign in to comment.