From fe4a38f0a19750ace3e8fc2ec21e89892dd0e672 Mon Sep 17 00:00:00 2001 From: kegsay Date: Fri, 20 Oct 2023 10:07:14 +0100 Subject: [PATCH] Make dirty runs dirtier (#683) * Make dirty runs dirtier Previously, `Deploy(t, 1)` would deploy 1 HS in 1 isolated blueprint which could be reused by other tests who also call `Deploy(t, 1)`. This pattern extended to `Deploy(t, 2)`, which would deploy 2x HSes in 1 isolated blueprint. This however means we'd run more containers in parallel. What's worse: these containers would only be cleaned up at the end of the entire test package run: `defer deployment.Destroy(t)` would do nothing. This was Very Bad on GHA boxes as they have limited resources, causing synapse/workers to fail to complete. * Increase timeouts for GHA boxes * Add more timeouts for GHA boxes * More increased timeouts for GHA --- internal/docker/deployer.go | 48 ++++++++++++ test_package.go | 78 +++++++++++++------ tests/csapi/apidoc_room_state_test.go | 2 +- ...federation_room_join_partial_state_test.go | 6 +- 4 files changed, 107 insertions(+), 27 deletions(-) diff --git a/internal/docker/deployer.go b/internal/docker/deployer.go index 6af22d74..96e6378b 100644 --- a/internal/docker/deployer.go +++ b/internal/docker/deployer.go @@ -76,6 +76,54 @@ func (d *Deployer) log(str string, args ...interface{}) { log.Printf(str, args...) } +// CreateDirtyServer creates a new dirty server on the dirty network, creating one if needed. +// This homeserver should be added to the dirty deployment. The hsName should start as 'hs1', then +// 'hs2' ... 'hsN'. +func (d *Deployer) CreateDirtyServer(hsName string) (*HomeserverDeployment, error) { + networkName, err := createNetworkIfNotExists(d.Docker, d.config.PackageNamespace, "dirty") + if err != nil { + return nil, fmt.Errorf("CreateDirtyDeployment: %w", err) + } + baseImageURI := d.config.BaseImageURI + // Use HS specific base image if defined + if uri, ok := d.config.BaseImageURIs[hsName]; ok { + baseImageURI = uri + } + + hsDeployment, err := deployImage( + d.Docker, baseImageURI, fmt.Sprintf("complement_%s_dirty_%s", d.config.PackageNamespace, hsName), + d.config.PackageNamespace, "", hsName, nil, "dirty", + networkName, d.config, + ) + if err != nil { + if hsDeployment != nil && hsDeployment.ContainerID != "" { + // print logs to help debug + printLogs(d.Docker, hsDeployment.ContainerID, "dirty") + } + return nil, fmt.Errorf("CreateDirtyServer: Failed to deploy image %v : %w", baseImageURI, err) + } + return hsDeployment, nil +} + +// CreateDirtyDeployment creates a clean HS without any blueprints. More HSes can be added later via +// CreateDirtyServer() +func (d *Deployer) CreateDirtyDeployment() (*Deployment, error) { + hsName := "hs1" + hsDeployment, err := d.CreateDirtyServer(hsName) + if err != nil { + return nil, err + } + // assign the HS to the deployment + return &Deployment{ + Deployer: d, + Dirty: true, + HS: map[string]*HomeserverDeployment{ + hsName: hsDeployment, + }, + Config: d.config, + }, nil +} + func (d *Deployer) Deploy(ctx context.Context, blueprintName string) (*Deployment, error) { dep := &Deployment{ Deployer: d, diff --git a/test_package.go b/test_package.go index 0003120c..ab06f14a 100644 --- a/test_package.go +++ b/test_package.go @@ -53,8 +53,8 @@ type TestPackage struct { // pointers to existing deployments for Deploy(t, 1) style deployments which are reused when run // in dirty mode. - existingDeployments map[int]*docker.Deployment - existingDeploymentsMu *sync.Mutex + existingDeployment *docker.Deployment + existingDeploymentMu *sync.Mutex } // NewTestPackage creates a new test package which can be used to deploy containers for all tests @@ -75,21 +75,20 @@ func NewTestPackage(pkgNamespace string) (*TestPackage, error) { logrus.SetLevel(logrus.ErrorLevel) return &TestPackage{ - complementBuilder: builder, - namespaceCounter: 0, - Config: cfg, - existingDeployments: make(map[int]*docker.Deployment), - existingDeploymentsMu: &sync.Mutex{}, + complementBuilder: builder, + namespaceCounter: 0, + Config: cfg, + existingDeploymentMu: &sync.Mutex{}, }, nil } func (tp *TestPackage) Cleanup() { // any dirty deployments need logs printed and post scripts run - tp.existingDeploymentsMu.Lock() - for _, dep := range tp.existingDeployments { - dep.DestroyAtCleanup() + tp.existingDeploymentMu.Lock() + if tp.existingDeployment != nil { + tp.existingDeployment.DestroyAtCleanup() } - tp.existingDeploymentsMu.Unlock() + tp.existingDeploymentMu.Unlock() tp.complementBuilder.Cleanup() } @@ -120,13 +119,9 @@ func (tp *TestPackage) OldDeploy(t *testing.T, blueprint b.Blueprint) Deployment func (tp *TestPackage) Deploy(t *testing.T, numServers int) Deployment { t.Helper() if tp.Config.EnableDirtyRuns { - tp.existingDeploymentsMu.Lock() - existingDep := tp.existingDeployments[numServers] - tp.existingDeploymentsMu.Unlock() - if existingDep != nil { - return existingDep - } + return tp.dirtyDeploy(t, numServers) } + // non-dirty deployments below blueprint := mapServersToBlueprint(numServers) timeStartBlueprint := time.Now() if err := tp.complementBuilder.ConstructBlueprintIfNotExist(blueprint); err != nil { @@ -143,15 +138,52 @@ func (tp *TestPackage) Deploy(t *testing.T, numServers int) Deployment { t.Fatalf("Deploy: Deploy returned error %s", err) } t.Logf("Deploy times: %v blueprints, %v containers", timeStartDeploy.Sub(timeStartBlueprint), time.Since(timeStartDeploy)) - if tp.Config.EnableDirtyRuns { - dep.Dirty = true // stop this deployment being destroyed. - tp.existingDeploymentsMu.Lock() - tp.existingDeployments[numServers] = dep - tp.existingDeploymentsMu.Unlock() - } return dep } +func (tp *TestPackage) dirtyDeploy(t *testing.T, numServers int) Deployment { + tp.existingDeploymentMu.Lock() + defer tp.existingDeploymentMu.Unlock() + // do we even have a deployment? + if tp.existingDeployment == nil { + d, err := docker.NewDeployer("dirty", tp.complementBuilder.Config) + if err != nil { + t.Fatalf("dirtyDeploy: NewDeployer returned error %s", err) + } + // this creates a single hs1 + tp.existingDeployment, err = d.CreateDirtyDeployment() + if err != nil { + t.Fatalf("CreateDirtyDeployment failed: %s", err) + } + } + + // if we have an existing deployment, can we use it? We can use it if we have at least that number of servers deployed already. + if len(tp.existingDeployment.HS) >= numServers { + return tp.existingDeployment + } + + // we need to scale up the dirty deployment to more servers + d, err := docker.NewDeployer("dirty", tp.complementBuilder.Config) + if err != nil { + t.Fatalf("dirtyDeploy: NewDeployer returned error %s", err) + } + for i := 1; i <= numServers; i++ { + hsName := fmt.Sprintf("hs%d", i) + _, ok := tp.existingDeployment.HS[hsName] + if ok { + continue + } + // scale up + hsDep, err := d.CreateDirtyServer(hsName) + if err != nil { + t.Fatalf("dirtyDeploy: failed to add %s: %s", hsName, err) + } + tp.existingDeployment.HS[hsName] = hsDep + } + + return tp.existingDeployment +} + // converts the requested number of servers into a single blueprint, which can be deployed using normal blueprint machinery. func mapServersToBlueprint(numServers int) b.Blueprint { servers := make([]b.Homeserver, numServers) diff --git a/tests/csapi/apidoc_room_state_test.go b/tests/csapi/apidoc_room_state_test.go index 308e0f0f..68489bd8 100644 --- a/tests/csapi/apidoc_room_state_test.go +++ b/tests/csapi/apidoc_room_state_test.go @@ -108,7 +108,7 @@ func TestRoomState(t *testing.T) { }) authedClient.MustDo(t, "GET", []string{"_matrix", "client", "v3", "publicRooms"}, - client.WithRetryUntil(time.Second, func(res *http.Response) bool { + client.WithRetryUntil(3*time.Second, func(res *http.Response) bool { foundRoom := false must.MatchResponse(t, res, match.HTTPResponse{ diff --git a/tests/msc3902/federation_room_join_partial_state_test.go b/tests/msc3902/federation_room_join_partial_state_test.go index 2ce6c420..4c7c8ccf 100644 --- a/tests/msc3902/federation_room_join_partial_state_test.go +++ b/tests/msc3902/federation_room_join_partial_state_test.go @@ -1030,7 +1030,7 @@ func TestPartialStateJoin(t *testing.T) { psjResult.Server.MustSendTransaction(t, deployment, "hs1", []json.RawMessage{event1.JSON(), event2.JSON()}, nil) // wait for the homeserver to persist the event. - awaitEventArrival(t, time.Second, alice, serverRoom.RoomID, event2.EventID()) + awaitEventArrival(t, 5*time.Second, alice, serverRoom.RoomID, event2.EventID()) // do a gappy sync which only picks up the second message. syncRes, _ := alice.MustSync(t, @@ -1100,7 +1100,7 @@ func TestPartialStateJoin(t *testing.T) { t.Logf("Derek created event with ID %s", event.EventID()) // wait for the homeserver to persist the event. - awaitEventArrival(t, time.Second, alice, serverRoom.RoomID, event.EventID()) + awaitEventArrival(t, 2*time.Second, alice, serverRoom.RoomID, event.EventID()) // do an incremental sync. syncRes, _ := alice.MustSync(t, @@ -1499,7 +1499,7 @@ func TestPartialStateJoin(t *testing.T) { // instead let's just check for the presence of the room in the timeline. // it can take a while for the homeserver to update its state for 100+ events, so raise // the default timeout. - alice.SyncUntilTimeout = 20 * time.Second + alice.SyncUntilTimeout = 30 * time.Second alice.MustSyncUntil(t, client.SyncReq{}, func(clientUserID string, topLevelSyncJSON gjson.Result) error {