Trigger RETRY operation internally

TODO tests, and test that RETRY can lead to another RETRY TODO: the new test is crashing sometimes TODO: Test that TODO figure out where to do the other part of the out-of-band work task (the one that’s not RETRY) Resolves #50.
ably · Nov 18, 2024 · 6bd7e60 · 6bd7e60
1 parent edd0f4d
commit 6bd7e60
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 12 deletions.
diff --git a/Sources/AblyChat/RoomLifecycleManager.swift b/Sources/AblyChat/RoomLifecycleManager.swift
@@ -188,7 +188,7 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
         case detaching(detachOperationID: UUID)
         case detached
         case detachedDueToRetryOperation(retryOperationID: UUID)
-        case suspendedAwaitingStartOfRetryOperation(error: ARTErrorInfo)
+        case suspendedAwaitingStartOfRetryOperation(retryOperationTask: Task<Void, Never>, error: ARTErrorInfo)
         case suspended(retryOperationID: UUID, error: ARTErrorInfo)
         case failed(error: ARTErrorInfo)
         case releasing(releaseOperationID: UUID)
@@ -210,7 +210,7 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
                 .detaching
             case .detached, .detachedDueToRetryOperation:
                 .detached
-            case let .suspendedAwaitingStartOfRetryOperation(error):
+            case let .suspendedAwaitingStartOfRetryOperation(_, error):
                 .suspended(error: error)
             case let .suspended(_, error):
                 .suspended(error: error)
@@ -509,7 +509,14 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
 
                 clearTransientDisconnectTimeouts()
 
-                changeStatus(to: .suspendedAwaitingStartOfRetryOperation(error: reason))
+                // TODO: does it matter that this task is happening beforehand? i.e. might the two SUSPENDED statuses come out of order? I think not, given that there is synchronous code here. Make a comment
+                let retryOperationTask = scheduleAnOperation(
+                    kind: .retry(
+                        triggeringContributor: contributor,
+                        errorForSuspendedStatus: reason
+                    )
+                )
+                changeStatus(to: .suspendedAwaitingStartOfRetryOperation(retryOperationTask: retryOperationTask, error: reason))
             }
         case .attaching:
             if !hasOperationInProgress, !contributorAnnotations[contributor].hasTransientDisconnectTimeout {
@@ -719,6 +726,25 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
         try result.get()
     }
 
+    /// The kinds of operation that you can schedule using ``scheduleAnOperation(kind:)``.
+    private enum OperationKind {
+        /// The RETRY operation.
+        case retry(triggeringContributor: Contributor, errorForSuspendedStatus: ARTErrorInfo)
+    }
+
+    // TODO: doc (what’s the return value?) and logging
+    private func scheduleAnOperation(kind: OperationKind) -> Task<Void, Never> {
+        Task {
+            switch kind {
+            case let .retry(triggeringContributor, errorForSuspendedStatus):
+                await performRetryOperation(
+                    triggeredByContributor: triggeringContributor,
+                    errorForSuspendedStatus: errorForSuspendedStatus
+                )
+            }
+        }
+    }
+
     // MARK: - ATTACH operation
 
     internal func performAttachOperation() async throws {
@@ -781,9 +807,16 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
                 case .suspended:
                     // CHA-RL1h2
                     let error = ARTErrorInfo(chatError: .attachmentFailed(feature: contributor.feature, underlyingError: contributorAttachError))
-                    changeStatus(to: .suspendedAwaitingStartOfRetryOperation(error: error))
 
                     // CHA-RL1h3
+                    // TODO: does it matter that this task is happening beforehand? i.e. might the two SUSPENDED statuses come out of order? I think not, given that there is synchronous code here. Make a comment
+                    let retryOperationTask = scheduleAnOperation(
+                        kind: .retry(
+                            triggeringContributor: contributor,
+                            errorForSuspendedStatus: error
+                        )
+                    )
+                    changeStatus(to: .suspendedAwaitingStartOfRetryOperation(retryOperationTask: retryOperationTask, error: error))
                     throw error
                 case .failed:
                     // CHA-RL1h4

diff --git a/Tests/AblyChatTests/DefaultRoomLifecycleManagerTests.swift b/Tests/AblyChatTests/DefaultRoomLifecycleManagerTests.swift
@@ -310,23 +310,51 @@ struct DefaultRoomLifecycleManagerTests {
 
     // @spec CHA-RL1h2
     // @specOneOf(1/2) CHA-RL1h1 - tests that an error gets thrown when channel attach fails due to entering SUSPENDED (TODO: but I don’t yet fully understand the meaning of CHA-RL1h1; outstanding question https://github.com/ably/specification/pull/200/files#r1765476610)
-    // @specPartial CHA-RL1h3 - Have tested the failure of the operation and the error that’s thrown. Have not yet implemented the "enter the recovery loop" (TODO: https://github.com/ably-labs/ably-chat-swift/issues/50)
+    // @spec CHA-RL1h3
     @Test
-    func attach_whenContributorFailsToAttachAndEntersSuspended_transitionsToSuspended() async throws {
+    func attach_whenContributorFailsToAttachAndEntersSuspended_transitionsToSuspendedAndPerformsRetryOperation() async throws {
         // Given: A DefaultRoomLifecycleManager, one of whose contributors’ call to `attach` fails causing it to enter the SUSPENDED status
         let contributorAttachError = ARTErrorInfo(domain: "SomeDomain", code: 123)
+        var nonFailingContributorsDetachOperations: [SignallableChannelOperation] = []
         let contributors = (1 ... 3).map { i in
             if i == 1 {
-                createContributor(attachBehavior: .completeAndChangeState(.failure(contributorAttachError), newState: .suspended))
+                return createContributor(
+                    attachBehavior: .fromFunction { callCount in
+                        if callCount == 1 {
+                            .completeAndChangeState(.failure(contributorAttachError), newState: .suspended) // The behaviour described above
+                        } else {
+                            .complete(.success) // So the CHA-RL5f RETRY attachment cycle succeeds
+                        }
+                    },
+
+                    // This is so that the RETRY operation’s wait-to-become-ATTACHED succeeds
+                    subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(
+                        .init(
+                            current: .attached,
+                            previous: .detached, // arbitrary
+                            event: .attached,
+                            reason: nil // arbitrary
+                        )
+                    )
+                )
             } else {
-                createContributor(attachBehavior: .success)
+                // These contributors will be detached by the RETRY operation; we want to be able to delay their completion until we have been able to verify that the room’s status is SUSPENDED; that is, to defer the RETRY operation’s transition to DETACHED
+                let detachOperation = SignallableChannelOperation()
+                nonFailingContributorsDetachOperations.append(detachOperation)
+
+                return createContributor(
+                    attachBehavior: .success, // So the CHA-RL5f RETRY attachment cycle succeeds
+                    detachBehavior: detachOperation.behavior
+                )
             }
         }
 
         let manager = await createManager(contributors: contributors)
 
-        let statusChangeSubscription = await manager.onRoomStatusChange(bufferingPolicy: .unbounded)
-        async let maybeSuspendedStatusChange = statusChangeSubscription.suspendedElements().first { _ in true }
+        let roomStatusChangeSubscription = await manager.onRoomStatusChange(bufferingPolicy: .unbounded)
+        async let maybeSuspendedRoomStatusChange = roomStatusChangeSubscription.suspendedElements().first { _ in true }
+
+        let managerStatusChangeSubscription = await manager.testsOnly_onStatusChange()
 
         // When: `performAttachOperation()` is called on the lifecycle manager
         async let roomAttachResult: Void = manager.performAttachOperation()
@@ -336,7 +364,7 @@ struct DefaultRoomLifecycleManagerTests {
         // 1. the room status transitions to SUSPENDED, with the status change’s `error` having the AttachmentFailed code corresponding to the feature of the failed contributor, `cause` equal to the error thrown by the contributor `attach` call
         // 2. the manager’s `error` is set to this same error
         // 3. the room attach operation fails with this same error
-        let suspendedStatusChange = try #require(await maybeSuspendedStatusChange)
+        let suspendedRoomStatusChange = try #require(await maybeSuspendedRoomStatusChange)
 
         #expect(await manager.roomStatus.isSuspended)
 
@@ -347,9 +375,39 @@ struct DefaultRoomLifecycleManagerTests {
             roomAttachError = error
         }
 
-        for error in await [suspendedStatusChange.error, manager.roomStatus.error, roomAttachError] {
+        for error in await [suspendedRoomStatusChange.error, manager.roomStatus.error, roomAttachError] {
             #expect(isChatError(error, withCode: .messagesAttachmentFailed, cause: contributorAttachError))
         }
+
+        // and:
+        // 4. The manager asynchronously performs a RETRY operation
+
+        // Allow the RETRY operation’s contributor detaches to complete
+        for detachOperation in nonFailingContributorsDetachOperations {
+            detachOperation.complete(behavior: .success) // So the CHA-RL5a RETRY detachment cycle succeeds
+        }
+
+        // Wait for the RETRY to finish
+        let retryOperationTask = try #require(await managerStatusChangeSubscription.compactMap { statusChange in
+            if case let .suspendedAwaitingStartOfRetryOperation(retryOperationTask, _) = statusChange.current {
+                return retryOperationTask
+            }
+            return nil
+        }
+        .first { _ in
+            true
+        })
+
+        await retryOperationTask.value
+
+        // We confirm that a RETRY happened by checking for its expected side effects:
+        #expect(await contributors[0].channel.detachCallCount == 0) // RETRY doesn’t touch this since it’s the one that triggered the RETRY
+        #expect(await contributors[1].channel.detachCallCount == 1) // From the CHA-RL5a RETRY detachment cycle
+        #expect(await contributors[2].channel.detachCallCount == 1) // From the CHA-RL5a RETRY detachment cycle
+
+        #expect(await contributors[0].channel.attachCallCount == 2) // From the ATTACH operation and the CHA-RL5f RETRY attachment cycle
+        #expect(await contributors[1].channel.attachCallCount == 1) // From the CHA-RL5f RETRY attachment cycle
+        #expect(await contributors[2].channel.attachCallCount == 1) // From the CHA-RL5f RETRY attachment cycle
     }
 
     // @specOneOf(2/2) CHA-RL1h1 - tests that an error gets thrown when channel attach fails due to entering FAILED (TODO: but I don’t yet fully understand the meaning of CHA-RL1h1; outstanding question https://github.com/ably/specification/pull/200/files#r1765476610))