Skip to content

Commit

Permalink
Make dirty runs dirtier (#683)
Browse files Browse the repository at this point in the history
* Make dirty runs dirtier

Previously, `Deploy(t, 1)` would deploy 1 HS in 1 isolated blueprint
which could be reused by other tests who also call `Deploy(t, 1)`. This
pattern extended to `Deploy(t, 2)`, which would deploy 2x HSes in 1
isolated blueprint. This however means we'd run more containers in parallel.
What's worse: these containers would only be cleaned up at the end of
the entire test package run: `defer deployment.Destroy(t)` would do nothing.

This was Very Bad on GHA boxes as they have limited resources, causing
synapse/workers to fail to complete.

* Increase timeouts for GHA boxes

* Add more timeouts for GHA boxes

* More increased timeouts for GHA
  • Loading branch information
kegsay authored Oct 20, 2023
1 parent fdfbde6 commit fe4a38f
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 27 deletions.
48 changes: 48 additions & 0 deletions internal/docker/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,54 @@ func (d *Deployer) log(str string, args ...interface{}) {
log.Printf(str, args...)
}

// CreateDirtyServer creates a new dirty server on the dirty network, creating one if needed.
// This homeserver should be added to the dirty deployment. The hsName should start as 'hs1', then
// 'hs2' ... 'hsN'.
func (d *Deployer) CreateDirtyServer(hsName string) (*HomeserverDeployment, error) {
networkName, err := createNetworkIfNotExists(d.Docker, d.config.PackageNamespace, "dirty")
if err != nil {
return nil, fmt.Errorf("CreateDirtyDeployment: %w", err)
}
baseImageURI := d.config.BaseImageURI
// Use HS specific base image if defined
if uri, ok := d.config.BaseImageURIs[hsName]; ok {
baseImageURI = uri
}

hsDeployment, err := deployImage(
d.Docker, baseImageURI, fmt.Sprintf("complement_%s_dirty_%s", d.config.PackageNamespace, hsName),
d.config.PackageNamespace, "", hsName, nil, "dirty",
networkName, d.config,
)
if err != nil {
if hsDeployment != nil && hsDeployment.ContainerID != "" {
// print logs to help debug
printLogs(d.Docker, hsDeployment.ContainerID, "dirty")
}
return nil, fmt.Errorf("CreateDirtyServer: Failed to deploy image %v : %w", baseImageURI, err)
}
return hsDeployment, nil
}

// CreateDirtyDeployment creates a clean HS without any blueprints. More HSes can be added later via
// CreateDirtyServer()
func (d *Deployer) CreateDirtyDeployment() (*Deployment, error) {
hsName := "hs1"
hsDeployment, err := d.CreateDirtyServer(hsName)
if err != nil {
return nil, err
}
// assign the HS to the deployment
return &Deployment{
Deployer: d,
Dirty: true,
HS: map[string]*HomeserverDeployment{
hsName: hsDeployment,
},
Config: d.config,
}, nil
}

func (d *Deployer) Deploy(ctx context.Context, blueprintName string) (*Deployment, error) {
dep := &Deployment{
Deployer: d,
Expand Down
78 changes: 55 additions & 23 deletions test_package.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ type TestPackage struct {

// pointers to existing deployments for Deploy(t, 1) style deployments which are reused when run
// in dirty mode.
existingDeployments map[int]*docker.Deployment
existingDeploymentsMu *sync.Mutex
existingDeployment *docker.Deployment
existingDeploymentMu *sync.Mutex
}

// NewTestPackage creates a new test package which can be used to deploy containers for all tests
Expand All @@ -75,21 +75,20 @@ func NewTestPackage(pkgNamespace string) (*TestPackage, error) {
logrus.SetLevel(logrus.ErrorLevel)

return &TestPackage{
complementBuilder: builder,
namespaceCounter: 0,
Config: cfg,
existingDeployments: make(map[int]*docker.Deployment),
existingDeploymentsMu: &sync.Mutex{},
complementBuilder: builder,
namespaceCounter: 0,
Config: cfg,
existingDeploymentMu: &sync.Mutex{},
}, nil
}

func (tp *TestPackage) Cleanup() {
// any dirty deployments need logs printed and post scripts run
tp.existingDeploymentsMu.Lock()
for _, dep := range tp.existingDeployments {
dep.DestroyAtCleanup()
tp.existingDeploymentMu.Lock()
if tp.existingDeployment != nil {
tp.existingDeployment.DestroyAtCleanup()
}
tp.existingDeploymentsMu.Unlock()
tp.existingDeploymentMu.Unlock()
tp.complementBuilder.Cleanup()
}

Expand Down Expand Up @@ -120,13 +119,9 @@ func (tp *TestPackage) OldDeploy(t *testing.T, blueprint b.Blueprint) Deployment
func (tp *TestPackage) Deploy(t *testing.T, numServers int) Deployment {
t.Helper()
if tp.Config.EnableDirtyRuns {
tp.existingDeploymentsMu.Lock()
existingDep := tp.existingDeployments[numServers]
tp.existingDeploymentsMu.Unlock()
if existingDep != nil {
return existingDep
}
return tp.dirtyDeploy(t, numServers)
}
// non-dirty deployments below
blueprint := mapServersToBlueprint(numServers)
timeStartBlueprint := time.Now()
if err := tp.complementBuilder.ConstructBlueprintIfNotExist(blueprint); err != nil {
Expand All @@ -143,15 +138,52 @@ func (tp *TestPackage) Deploy(t *testing.T, numServers int) Deployment {
t.Fatalf("Deploy: Deploy returned error %s", err)
}
t.Logf("Deploy times: %v blueprints, %v containers", timeStartDeploy.Sub(timeStartBlueprint), time.Since(timeStartDeploy))
if tp.Config.EnableDirtyRuns {
dep.Dirty = true // stop this deployment being destroyed.
tp.existingDeploymentsMu.Lock()
tp.existingDeployments[numServers] = dep
tp.existingDeploymentsMu.Unlock()
}
return dep
}

func (tp *TestPackage) dirtyDeploy(t *testing.T, numServers int) Deployment {
tp.existingDeploymentMu.Lock()
defer tp.existingDeploymentMu.Unlock()
// do we even have a deployment?
if tp.existingDeployment == nil {
d, err := docker.NewDeployer("dirty", tp.complementBuilder.Config)
if err != nil {
t.Fatalf("dirtyDeploy: NewDeployer returned error %s", err)
}
// this creates a single hs1
tp.existingDeployment, err = d.CreateDirtyDeployment()
if err != nil {
t.Fatalf("CreateDirtyDeployment failed: %s", err)
}
}

// if we have an existing deployment, can we use it? We can use it if we have at least that number of servers deployed already.
if len(tp.existingDeployment.HS) >= numServers {
return tp.existingDeployment
}

// we need to scale up the dirty deployment to more servers
d, err := docker.NewDeployer("dirty", tp.complementBuilder.Config)
if err != nil {
t.Fatalf("dirtyDeploy: NewDeployer returned error %s", err)
}
for i := 1; i <= numServers; i++ {
hsName := fmt.Sprintf("hs%d", i)
_, ok := tp.existingDeployment.HS[hsName]
if ok {
continue
}
// scale up
hsDep, err := d.CreateDirtyServer(hsName)
if err != nil {
t.Fatalf("dirtyDeploy: failed to add %s: %s", hsName, err)
}
tp.existingDeployment.HS[hsName] = hsDep
}

return tp.existingDeployment
}

// converts the requested number of servers into a single blueprint, which can be deployed using normal blueprint machinery.
func mapServersToBlueprint(numServers int) b.Blueprint {
servers := make([]b.Homeserver, numServers)
Expand Down
2 changes: 1 addition & 1 deletion tests/csapi/apidoc_room_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func TestRoomState(t *testing.T) {
})

authedClient.MustDo(t, "GET", []string{"_matrix", "client", "v3", "publicRooms"},
client.WithRetryUntil(time.Second, func(res *http.Response) bool {
client.WithRetryUntil(3*time.Second, func(res *http.Response) bool {
foundRoom := false

must.MatchResponse(t, res, match.HTTPResponse{
Expand Down
6 changes: 3 additions & 3 deletions tests/msc3902/federation_room_join_partial_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,7 @@ func TestPartialStateJoin(t *testing.T) {
psjResult.Server.MustSendTransaction(t, deployment, "hs1", []json.RawMessage{event1.JSON(), event2.JSON()}, nil)

// wait for the homeserver to persist the event.
awaitEventArrival(t, time.Second, alice, serverRoom.RoomID, event2.EventID())
awaitEventArrival(t, 5*time.Second, alice, serverRoom.RoomID, event2.EventID())

// do a gappy sync which only picks up the second message.
syncRes, _ := alice.MustSync(t,
Expand Down Expand Up @@ -1100,7 +1100,7 @@ func TestPartialStateJoin(t *testing.T) {
t.Logf("Derek created event with ID %s", event.EventID())

// wait for the homeserver to persist the event.
awaitEventArrival(t, time.Second, alice, serverRoom.RoomID, event.EventID())
awaitEventArrival(t, 2*time.Second, alice, serverRoom.RoomID, event.EventID())

// do an incremental sync.
syncRes, _ := alice.MustSync(t,
Expand Down Expand Up @@ -1499,7 +1499,7 @@ func TestPartialStateJoin(t *testing.T) {
// instead let's just check for the presence of the room in the timeline.
// it can take a while for the homeserver to update its state for 100+ events, so raise
// the default timeout.
alice.SyncUntilTimeout = 20 * time.Second
alice.SyncUntilTimeout = 30 * time.Second
alice.MustSyncUntil(t,
client.SyncReq{},
func(clientUserID string, topLevelSyncJSON gjson.Result) error {
Expand Down

0 comments on commit fe4a38f

Please sign in to comment.