From 20b62d8ec9b79936c72bfdfc1f35b241120e9594 Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Tue, 28 May 2024 13:07:12 -0300 Subject: [PATCH 1/3] More time to server.Serve goroutine to advance We might be too busy polling the connection state and that could be preventing the server goroutine to advance. That's a guess: this error has proved to be very hard to reproduce locally to put confidence on the actual cause. --- wsl-pro-service/internal/daemon/daemon_test.go | 9 ++++----- wsl-pro-service/internal/streams/server_test.go | 8 ++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/wsl-pro-service/internal/daemon/daemon_test.go b/wsl-pro-service/internal/daemon/daemon_test.go index 394fcde81..7c7c2bf75 100644 --- a/wsl-pro-service/internal/daemon/daemon_test.go +++ b/wsl-pro-service/internal/daemon/daemon_test.go @@ -179,8 +179,7 @@ func TestServe(t *testing.T) { return systemd.gotState.Load() == "STATUS=Connected" }, 30*time.Second, time.Second, "Systemd never switched states to 'Connected'") - require.Eventually(t, agent.Service.AllConnected, - 30*time.Second, time.Second, "The daemon should have connected to the Windows Agent") + require.Eventually(t, agent.Service.AllConnected, 30*time.Second, time.Second, "The daemon should have connected to the Windows Agent") require.Eventually(t, func() bool { conOk := len(agent.Service.Connect.History()) > 0 @@ -293,7 +292,7 @@ func TestServeAndQuit(t *testing.T) { require.False(t, systemd.gotUnsetEnvironment.Load(), "Unexpected value sent by Daemon to systemd notifier's unsetEnvironment") - require.Eventually(t, agent.Service.AllConnected, 10*time.Second, 100*time.Millisecond, "Daemon never connected to agent's service") + require.Eventually(t, agent.Service.AllConnected, 10*time.Second, 500*time.Millisecond, "Daemon never connected to agent's service") } d.Quit(ctx, tc.quitForcefully) @@ -372,7 +371,7 @@ func TestReconnection(t *testing.T) { return systemd.gotState.Load() == "STATUS=Connected" }, maxTimeout, time.Second, "Service should have set systemd state to Connected") - require.Eventually(t, agent.Service.AllConnected, 10*time.Second, 100*time.Millisecond, "Daemon never connected to agent's service") + require.Eventually(t, agent.Service.AllConnected, 10*time.Second, 500*time.Millisecond, "Daemon never connected to agent's service") if tc.firstConnectionLong { // "Long-lived" means longer than a minute @@ -389,7 +388,7 @@ func TestReconnection(t *testing.T) { agent = testutils.NewMockWindowsAgent(t, ctx, publicDir) defer agent.Stop() - require.Eventually(t, agent.Service.AllConnected, 20*time.Second, 100*time.Millisecond, "Daemon never connected to agent's service") + require.Eventually(t, agent.Service.AllConnected, 20*time.Second, 500*time.Millisecond, "Daemon never connected to agent's service") require.EqualValues(t, 1, systemd.readyNotifications.Load(), "Service should have notified systemd after connecting to the control stream") }) } diff --git a/wsl-pro-service/internal/streams/server_test.go b/wsl-pro-service/internal/streams/server_test.go index 8b1fb7ee4..21fc02e2c 100644 --- a/wsl-pro-service/internal/streams/server_test.go +++ b/wsl-pro-service/internal/streams/server_test.go @@ -38,9 +38,7 @@ func TestServe(t *testing.T) { // Test handshake - require.Eventually(t, func() bool { - return agent.Service.AllConnected() - }, 20*time.Second, 100*time.Millisecond, "Setup: Agent service never became ready") + require.Eventually(t, agent.Service.AllConnected, 20*time.Second, 500*time.Millisecond, "Setup: Agent service never became ready") // Test receiving a pro token and returning success err = agent.Service.ProAttachment.Send(&agentapi.ProAttachCmd{Token: "token345"}) @@ -113,9 +111,7 @@ func TestStop(t *testing.T) { close(errCh) }() - require.Eventually(t, func() bool { - return agent.Service.AllConnected() - }, 20*time.Second, 100*time.Millisecond, "Setup: Agent service never became ready") + require.Eventually(t, agent.Service.AllConnected, 20*time.Second, 500*time.Millisecond, "Setup: Agent service never became ready") ctx, cancel := context.WithCancel(ctx) defer cancel() From ded0d383fad63a5c9a48337bd3a563750283291a Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Tue, 28 May 2024 14:21:41 -0300 Subject: [PATCH 2/3] s.setBlocking race with the Apply* methods As the handlers that invoke the Apply* methods run in other goroutines even though the service.setBlocking is called before issuings RPCs that will eventually be handled by the mockService, it may happen that the goroutines could be running in different threads and thus have no visibility of the changes to the s.blockingCalls flag. Either using atomics or a RWMutex should suffice and make the test future proof, as we may decide to change the order of the calls for other reasons. --- wsl-pro-service/internal/streams/server_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/wsl-pro-service/internal/streams/server_test.go b/wsl-pro-service/internal/streams/server_test.go index 21fc02e2c..ca6a384a5 100644 --- a/wsl-pro-service/internal/streams/server_test.go +++ b/wsl-pro-service/internal/streams/server_test.go @@ -3,6 +3,7 @@ package streams_test import ( "context" "errors" + "sync" "testing" "time" @@ -138,11 +139,15 @@ func TestStop(t *testing.T) { type mockService struct { blockingCalls bool + mu sync.RWMutex ctx context.Context } func (s *mockService) setBlocking(ctx context.Context) { + s.mu.Lock() + defer s.mu.Unlock() + s.blockingCalls = true s.ctx = ctx } @@ -153,6 +158,9 @@ func (s *mockService) ApplyProToken(ctx context.Context, msg *agentapi.ProAttach } // Mock a slow task that can be cancelled + // Using a mutex because those calls can race with s.setBlocking. + s.mu.RLock() + defer s.mu.RUnlock() if s.blockingCalls { select { case <-ctx.Done(): @@ -172,6 +180,9 @@ func (s *mockService) ApplyLandscapeConfig(ctx context.Context, msg *agentapi.La } // Mock a slow task that can be cancelled + // Using a mutex because those calls can race with s.setBlocking. + s.mu.RLock() + defer s.mu.RUnlock() if s.blockingCalls { select { case <-ctx.Done(): From 91b0188508977f1c09866d779323cced752cde7c Mon Sep 17 00:00:00 2001 From: Carlos Nihelton Date: Tue, 28 May 2024 17:54:17 -0300 Subject: [PATCH 3/3] Passant cleaning some whitespaces --- wsl-pro-service/internal/streams/server_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/wsl-pro-service/internal/streams/server_test.go b/wsl-pro-service/internal/streams/server_test.go index ca6a384a5..7bded6a33 100644 --- a/wsl-pro-service/internal/streams/server_test.go +++ b/wsl-pro-service/internal/streams/server_test.go @@ -38,7 +38,6 @@ func TestServe(t *testing.T) { }() // Test handshake - require.Eventually(t, agent.Service.AllConnected, 20*time.Second, 500*time.Millisecond, "Setup: Agent service never became ready") // Test receiving a pro token and returning success @@ -51,7 +50,6 @@ func TestServe(t *testing.T) { require.Empty(t, agent.Service.ProAttachment.History()[1].GetResult(), "ProAttachment should return a successful result") // Test receiving a pro token and returning error - err = agent.Service.ProAttachment.Send(&agentapi.ProAttachCmd{Token: "HARDCODED_FAILURE"}) require.NoError(t, err, "Send should return no error") @@ -61,7 +59,6 @@ func TestServe(t *testing.T) { require.NotEmpty(t, agent.Service.ProAttachment.History()[2].GetResult(), "ProAttachment should return an error result") // Test receiving a Landscape config and returning success - err = agent.Service.LandscapeConfig.Send(&agentapi.LandscapeConfigCmd{Config: "hello=world"}) require.NoError(t, err, "Send should return no error") @@ -71,7 +68,6 @@ func TestServe(t *testing.T) { require.Empty(t, agent.Service.LandscapeConfig.History()[1].GetResult(), "LandscapeConfig should return a successful result") // Test receiving a Landscape config and returning error - err = agent.Service.LandscapeConfig.Send(&agentapi.LandscapeConfigCmd{Config: "HARDCODED_FAILURE"}) require.NoError(t, err, "Send should return no error")