From cb45a3f511132925b0ea76ecbcf14e872d4854c7 Mon Sep 17 00:00:00 2001 From: Lawrence Forooghian Date: Thu, 31 Oct 2024 16:35:40 -0300 Subject: [PATCH] Implement RETRY room lifecycle operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the spec referenced in 20f21c7. The RETRY part of this spec was quite unclear, and I asked quite a few questions on the PR to understand it better, so the behaviour implemented here is based on the spec plus Andy’s answers to my questions (I’ve linked to the discussions in the code and / or tests). Recently (i.e. after most of this commit was already implemented, Andy has updated the spec with answers to these questions, but in the interests of not dragging out the current task, I’ll incorporate these updates in #66. The internal triggering of the RETRY operation (as specified by CHA-RL1h3 and CHA-RL4b9) will come in #50. Resolves #51. --- Sources/AblyChat/RoomLifecycleManager.swift | 170 +++++++++- .../DefaultRoomLifecycleManagerTests.swift | 310 +++++++++++++++++- .../MockRoomLifecycleContributorChannel.swift | 18 +- 3 files changed, 478 insertions(+), 20 deletions(-) diff --git a/Sources/AblyChat/RoomLifecycleManager.swift b/Sources/AblyChat/RoomLifecycleManager.swift index a6dc72bc..f950a473 100644 --- a/Sources/AblyChat/RoomLifecycleManager.swift +++ b/Sources/AblyChat/RoomLifecycleManager.swift @@ -181,11 +181,12 @@ internal actor DefaultRoomLifecycleManager UUID { + switch self { + case .attachOperation: + // i.e. generate a new ID + UUID() + case let .retryOperation(id): + // i.e. keep the ID of the existing RETRY operation + id + } + } + } + + // TODO: explain what this is, and what I’ve guessed + private func performAttachmentCycle(trigger: AttachmentCycleTrigger) async throws { // CHA-RL1e - changeStatus(to: .attachingDueToAttachOperation(attachOperationID: operationID)) + changeStatus(to: trigger.attachingStatus) // CHA-RL1f for contributor in contributors { @@ -701,10 +738,20 @@ internal actor DefaultRoomLifecycleManager MockRoomLifecycleContributor { .init( feature: feature, channel: .init( initialState: initialState, attachBehavior: attachBehavior, - detachBehavior: detachBehavior + detachBehavior: detachBehavior, + subscribeToStateBehavior: subscribeToStateBehavior ) ) } @@ -881,6 +883,310 @@ struct DefaultRoomLifecycleManagerTests { #expect(await manager.roomStatus == .released) } + // MARK: - RETRY operation + + // @spec CHA-RL5a - This is actually what’s written in the spec plus Andy’s clarification from https://github.com/ably/specification/pull/200/files#r1794116352 that "the implementation is supposed to be that all channels _except_ the one that entered suspended should be sent to detached" (TODO remove this comment once spec updated) + @Test + func retry_detachesAllContributorsExceptForTriggering() async throws { + // Given: A RoomLifecycleManager + let contributors = [ + createContributor(attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(.init(current: .attached, previous: .attaching, event: .attached, reason: .createUnknownError() /* arbitrary, but non-nil so that the CHA-RL4b1 code doesn’t crash TODO decide right thing to do */ )) /* TODO: so that RETRY completes per CHA-RL5d wait, this is a bit hard to read though */ ), + createContributor(attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, detachBehavior: .success /* so that RETRY completes */ ), + createContributor(attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, detachBehavior: .success /* so that RETRY completes */ ), + ] + + let manager = await createManager(contributors: contributors) + + // When: `performRetryOperation(triggeredByContributor:)` is called on the manager + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: The manager calls `detach` on all contributors except that which triggered the RETRY + #expect(await contributors[0].channel.detachCallCount == 0) + #expect(await contributors[1].channel.detachCallCount == 1) + #expect(await contributors[2].channel.detachCallCount == 1) + } + + // TODO: + // @spec CHA-RL5b - This is actually what’s written in the spec plus Andy’s clarification from https://github.com/ably/specification/pull/200/files#r1794116352 that this means "If the operation above fails because a channel has entered a state other than FAILED" (TODO remove this comment once spec updated) + @Test + func retry_ifDetachFailsDueToNonFailedChannelState_retries() async throws { + // Given: A RoomLifecycleManager, whose contributor at index 1 throws an error upon `detach()` being called on it, and which is in a non-FAILED state after this `detach()` call fails, and on which calling `detach()` succeeds the second time it is called + let detachImpl = { @Sendable (callCount: Int) -> MockRoomLifecycleContributorChannel.AttachOrDetachBehavior in + .complete( + callCount == 1 ? .failure(.createUnknownError() /* arbitrary */ ) + : .success // otherwise, succeed, so that RETRY completes + ) + } + + let contributors = [ + createContributor( + attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, + subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(.init(current: .attached, previous: .attaching, event: .attached, reason: .createUnknownError() /* arbitrary, but non-nil so that the CHA-RL4b1 code doesn’t crash TODO decide right thing to do */ )) /* TODO: so that RETRY completes per CHA-RL5d wait, this is a bit hard to read though */ + ), + createContributor( + // TODO: mimic the completeAndChangeState from elsewhere + initialState: .suspended, // arbitrary non-FAILED state; ideally the contributor would only go into this state _after_ its detach fails, but for the sake of keeping the mock simple let’s do it beforehand and assume that the manager won’t check it beforehand + attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, + detachBehavior: .fromFunction(detachImpl) + ), + createContributor( + attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, + detachBehavior: .success /* so that RETRY completes */ + ), + ] + + let clock = MockSimpleClock() + + let manager = await createManager(contributors: contributors, clock: clock) + + // When: `performRetryOperation(triggeredByContributor:)` is called on the manager, triggered by a contributor that isn’t that at index 1 + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: The manager calls `detach` in sequence on all contributors except that which triggered the RETRY, stopping upon one of these `detach` calls throwing an error, then sleeps for 0.5s, then performs these `detach` calls again + + // (Note that for simplicity of the test I’m not actually making assertions about the sequence in which events happen here) + #expect(await contributors[0].channel.detachCallCount == 0) + #expect(await contributors[1].channel.detachCallCount == 2) + #expect(await contributors[2].channel.detachCallCount == 1) + #expect(await clock.sleepCallArguments == [0.5]) + } + + // @spec CHA-RL5c + @Test + func retry_ifDetachFailsDueToFailedChannelState_transitionsToFailed() async throws { + // Given: A RoomLifecycleManager, whose contributor at index 1 throws an error upon `detach()` being called on it, and which is the FAILED state after this `detach()` call fails + let contributorFailedStateError = ARTErrorInfo.createUnknownError() // arbitrary + + let contributors = [ + createContributor(), + createContributor( + detachBehavior: .completeAndChangeState(.failure(contributorFailedStateError), newState: .failed) + ), + createContributor(), + ] + + let manager = await createManager(contributors: contributors) + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let failedStatusChange = roomStatusSubscription.failedElements().first { _ in true } + + // When: `performRetryOperation(triggeredByContributor:)` is called on the manager, triggered by the contributor at index 0 + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: The manager calls `detach` in sequence on all contributors except that which triggered the RETRY, stopping upon one of these `detach` calls throwing an error, then enters the FAILED status, the associated error for this status being that associated with the contributor’s FAILED status (TODO: clarify which error is meant to be used here; https://github.com/ably/specification/pull/200/files#r1827819961) + #expect(await contributors[0].channel.detachCallCount == 0) + #expect(await contributors[1].channel.detachCallCount == 1) + #expect(await contributors[2].channel.detachCallCount == 0) + + _ = try #require(await failedStatusChange) + #expect(await manager.current == .failed(error: contributorFailedStateError)) + } + + // @spec CHA-RL5d + // @specPartial CHA-RL5f - Tests that the room transitions to ATTACHED, but not any of the side effects of the "attachment cycle" + @Test + func retry_afterDetach_waitsForTriggeringContributorToBecomeAttached() async throws { + // TODO: should this be a separate test or bolted on to the happy-path CHA-RL5a test? + // TODO: what about it already being in that state? + // Given: A RoomLifecycleManager, with a contributor that emits a state change to ATTACHED after subscribeToState() is called on it + let contributorAttachedStateChangeReason = ARTErrorInfo.createUnknownError() + + let contributorAttachedStateChange = ARTChannelStateChange( + current: .attached, + previous: .attaching, // arbitrary + event: .attached, + reason: contributorAttachedStateChangeReason // arbitrary, but non-nil so that the CHA-RL4b1 code doesn’t crash TODO decide right thing to do + ) + + let contributor = createContributor( + attachBehavior: .success /* TODO: best way to handle? this is so that we get through the subsequent CHA-RL5f attachment cycle */, + subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(contributorAttachedStateChange) + ) + + let manager = await createManager( + contributors: [contributor] + ) + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let maybeAttachingStatusChange = roomStatusSubscription.attachingElements().first { _ in true } + + // When: `performRetryOperation(triggeredByContributor:)` is called on the manager, triggered by the aforementioned contributor + // TODO: this is not a great test, and there’s no easy way to check that it would do something else if it were a non-ATTACHED; let’s perhaps do that in the test for FAILED + await manager.performRetryOperation(triggeredByContributor: contributor) + + // Then: The room transitions to ATTACHING, and the RETRY operation completes + let attachingStatusChange = try #require(await maybeAttachingStatusChange) + // TODO: check this one — spec isn’t clear + #expect(attachingStatusChange.error == nil) + } + + // @spec CHA-RL5e + @Test + func retry_afterDetach_whenTriggeringContributorBecomesFailed() async throws { + // TODO: what about it already being in that state? + // Given: A RoomLifecycleManager, with a contributor that emits a state change to FAILED after subscribeToState() is called on it + let contributorFailedStateError = ARTErrorInfo.createUnknownError() // arbitrary + let contributorFailedStateChange = ARTChannelStateChange( + current: .failed, + previous: .attaching, // arbitrary + event: .failed, + reason: contributorFailedStateError + ) + + let contributor = createContributor( + subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(contributorFailedStateChange) + ) + + let manager = await createManager( + // TODO: figure out right thing to do here (should RETRY be the one who transitions us to SUSPENDED?) — this is currently just there so that the manager believes it has an operation in progress and hence doesn’t try to detach contributors, causing a crash + forTestingWhatHappensWhenCurrentlyIn: .suspended(retryOperationID: UUID(), error: .createUnknownError()), + contributors: [contributor] + ) + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let failedStatusChange = roomStatusSubscription.failedElements().first { _ in true } + + // When: `performRetryOperation(triggeredByContributor:)` is called on the manager, triggered by the aforementioned contributor + await manager.performRetryOperation(triggeredByContributor: contributor) + + // Then: The RETRY operation completes and the room enters the FAILED status, the associated error for this status being that associated with the contributor’s FAILED state change (TODO: clarify which error is meant to be used here; https://github.com/ably/specification/pull/200/files#r1829344042) + _ = try #require(await failedStatusChange) + #expect(await manager.current == .failed(error: contributorFailedStateError)) + } + + // @spec CHA-RL5f + // @spec CHA-RL5f1 - TODO all of CHA-RL5 is kind of best-of-ability guesses + @Test + func retry_attachment_success() async throws { + // TODO: DRY up and tidy up these tests, and figure out their relation to the ATTACH operation + // the below is, I think, the setup needed to slide us into the CHA-RL5 cases + let attachedStateChange = ARTChannelStateChange(current: .attached, previous: .attaching, event: .attached, reason: .createUnknownError()) + + let contributors = [ + createContributor(attachBehavior: .success, subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(attachedStateChange)), + createContributor(attachBehavior: .success, detachBehavior: .complete(.success)), + createContributor(attachBehavior: .success, detachBehavior: .complete(.success)), + ] + + let manager = await createManager( + forTestingWhatHappensWhenCurrentlyIn: .suspended(retryOperationID: UUID(), error: .createUnknownError()), // again, this is for just disabling the lifecycle stuff + contributors: contributors + ) + + // now, we need to set up the per-test behaviour — actually, we have to do it above, because that’s where we define the attach behaviour (so let’s try and parameterise that then after) + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let attachedStatusChange = roomStatusSubscription.first { $0.current == .attached } + + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: all contributors get attached, we transition to ATTACHED, and RETRY op completes + // TODO: discontinuity errors are broadcast to subscribers — will fill this in after + for contributor in contributors { + #expect(await contributor.channel.attachCallCount == 1) + } + + _ = try #require(await attachedStatusChange) + } + + // @spec CHA-RL5f2 + @Test + func retry_attachment_failed() async throws { + // the below is, I think, the setup needed to slide us into the CHA-RL5 cases + let attachedStateChange = ARTChannelStateChange(current: .attached, previous: .attaching, event: .attached, reason: .createUnknownError()) + + let failedError = ARTErrorInfo.createUnknownError() + + let contributors = [ + createContributor(attachBehavior: .success, subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(attachedStateChange)), + createContributor(attachBehavior: .completeAndChangeState(.failure(failedError), newState: .failed), detachBehavior: .complete(.success)), + createContributor(attachBehavior: .success, detachBehavior: .complete(.success)), + ] + + let manager = await createManager( + forTestingWhatHappensWhenCurrentlyIn: .suspended(retryOperationID: UUID(), error: .createUnknownError()), // again, this is for just disabling the lifecycle stuff + contributors: contributors + ) + + // now, we need to set up the per-test behaviour — actually, we have to do it above, because that’s where we define the attach behaviour (so let’s try and parameterise that then after) + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let maybeFailedStatusChange = roomStatusSubscription.failedElements().first { _ in true } + + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: all contributors get attached until the one that fails, we transition to FAILED, and RETRY op completes + #expect(await contributors[0].channel.attachCallCount == 1) + #expect(await contributors[1].channel.attachCallCount == 1) + #expect(await contributors[2].channel.attachCallCount == 0) + + let failedStatusChange = try #require(await maybeFailedStatusChange) + // TODO: confirm + #expect(isChatError(failedStatusChange.error, withCode: .messagesAttachmentFailed, cause: failedError)) + } + + // @spec CHA-RL5f3 + @Test + func retry_attachment_suspended() async throws { + // the below is, I think, the setup needed to slide us into the CHA-RL5 cases + let attachedStateChange = ARTChannelStateChange(current: .attached, previous: .attaching, event: .attached, reason: .createUnknownError()) + + let suspendedError = ARTErrorInfo.createUnknownError() + + let contributor1AttachImpl = { @Sendable (callCount: Int) -> MockRoomLifecycleContributorChannel.AttachOrDetachBehavior in + if callCount == 1 { + .completeAndChangeState(.failure(suspendedError), newState: .suspended) + } else { + .complete(.success) + } + } + + let contributors = [ + createContributor(attachBehavior: .success, detachBehavior: .complete(.success), subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(attachedStateChange)), + createContributor(attachBehavior: .fromFunction(contributor1AttachImpl), detachBehavior: .complete(.success), subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(attachedStateChange)), + createContributor(attachBehavior: .success, detachBehavior: .complete(.success)), + ] + + let manager = await createManager( + forTestingWhatHappensWhenCurrentlyIn: .suspended(retryOperationID: UUID(), error: .createUnknownError()), // again, this is for just disabling the lifecycle stuff + contributors: contributors + ) + + // now, we need to set up the per-test behaviour — actually, we have to do it above, because that’s where we define the attach behaviour (so let’s try and parameterise that then after) + + // TODO: to note: the contributor1AttachBehavior here is so that the first "attachment cycle" fails and the second succeeds + // TODO: to note: the contributor 0 detachBehavior is because we also anticipate that one being detached + // TODO: to note: the contributor 1 subscribeToStateBehavior is so that the second RETRY proceeds + + let roomStatusSubscription = await manager.onChange(bufferingPolicy: .unbounded) + async let maybeFirstFourRoomStatusChanges = roomStatusSubscription.prefix(4) + + await manager.performRetryOperation(triggeredByContributor: contributors[0]) + + // Then: all contributors get attached until the one that fails, we transition to SUSPENDED, and RETRY happens again, the second time round the attachment cycle succeeds and we transition to ATTACHED per CHA-RL5f + // i.e. we get ATTACHING (first attachment cycle), SUSPENDED (because attachment cycle failed), ATTACHING (second attachment cycle), ATTACHED + let firstFourRoomStatusChanges = await Array(maybeFirstFourRoomStatusChanges) + + // TODO: what error for ATTACHING? + #expect(firstFourRoomStatusChanges[0].current == .attaching(error: nil)) + + let suspendedStatusChange = try #require(Subscription.StatusChangeWithError(maybeSuspendedStatusChange: firstFourRoomStatusChanges[1])) + // TODO: confirm + #expect(isChatError(suspendedStatusChange.error, withCode: .messagesAttachmentFailed, cause: suspendedError)) + + #expect(firstFourRoomStatusChanges[2].current == .attaching(error: nil)) + + #expect(firstFourRoomStatusChanges[3].current == .attached) + + #expect(await contributors[0].channel.attachCallCount == 2) // because of first and second attachment cycle + #expect(await contributors[1].channel.attachCallCount == 2) // because of first and second attachment cycle + #expect(await contributors[2].channel.attachCallCount == 1) // because of second attachment cycle + + #expect(await contributors[0].channel.detachCallCount == 1) // because of first RETRY + #expect(await contributors[1].channel.detachCallCount == 1) // because of second RETRY + #expect(await contributors[2].channel.detachCallCount == 2) // because of first and second RETRY + } + // MARK: - Handling contributor UPDATE events // @spec CHA-RL4a1 diff --git a/Tests/AblyChatTests/Mocks/MockRoomLifecycleContributorChannel.swift b/Tests/AblyChatTests/Mocks/MockRoomLifecycleContributorChannel.swift index 28f6723e..b363f875 100644 --- a/Tests/AblyChatTests/Mocks/MockRoomLifecycleContributorChannel.swift +++ b/Tests/AblyChatTests/Mocks/MockRoomLifecycleContributorChannel.swift @@ -4,6 +4,7 @@ final actor MockRoomLifecycleContributorChannel: RoomLifecycleContributorChannel { private let attachBehavior: AttachOrDetachBehavior? private let detachBehavior: AttachOrDetachBehavior? + private let subscribeToStateBehavior: SubscribeToStateBehavior var state: ARTRealtimeChannelState var errorReason: ARTErrorInfo? @@ -16,11 +17,13 @@ final actor MockRoomLifecycleContributorChannel: RoomLifecycleContributorChannel init( initialState: ARTRealtimeChannelState, attachBehavior: AttachOrDetachBehavior?, - detachBehavior: AttachOrDetachBehavior? + detachBehavior: AttachOrDetachBehavior?, + subscribeToStateBehavior: SubscribeToStateBehavior? ) { state = initialState self.attachBehavior = attachBehavior self.detachBehavior = detachBehavior + self.subscribeToStateBehavior = subscribeToStateBehavior ?? .justAddSubscription } enum AttachOrDetachResult { @@ -52,6 +55,11 @@ final actor MockRoomLifecycleContributorChannel: RoomLifecycleContributorChannel } } + enum SubscribeToStateBehavior { + case justAddSubscription + case addSubscriptionAndEmitStateChange(ARTChannelStateChange) + } + func attach() async throws(ARTErrorInfo) { attachCallCount += 1 @@ -100,6 +108,14 @@ final actor MockRoomLifecycleContributorChannel: RoomLifecycleContributorChannel func subscribeToState() -> Subscription { let subscription = Subscription(bufferingPolicy: .unbounded) subscriptions.append(subscription) + + switch subscribeToStateBehavior { + case .justAddSubscription: + break + case let .addSubscriptionAndEmitStateChange(stateChange): + emitStateChange(stateChange) + } + return subscription }