Skip to content

Commit

Permalink
Trigger RETRY operation internally
Browse files Browse the repository at this point in the history
TODO tests, and test that RETRY can lead to another RETRY

TODO: the new test is crashing sometimes
TODO: Test that

TODO figure out where to do the other part of the out-of-band work task
(the one that’s not RETRY)

Resolves #50.
  • Loading branch information
lawrence-forooghian committed Nov 18, 2024
1 parent edd0f4d commit 6bd7e60
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 12 deletions.
41 changes: 37 additions & 4 deletions Sources/AblyChat/RoomLifecycleManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
case detaching(detachOperationID: UUID)
case detached
case detachedDueToRetryOperation(retryOperationID: UUID)
case suspendedAwaitingStartOfRetryOperation(error: ARTErrorInfo)
case suspendedAwaitingStartOfRetryOperation(retryOperationTask: Task<Void, Never>, error: ARTErrorInfo)
case suspended(retryOperationID: UUID, error: ARTErrorInfo)
case failed(error: ARTErrorInfo)
case releasing(releaseOperationID: UUID)
Expand All @@ -210,7 +210,7 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
.detaching
case .detached, .detachedDueToRetryOperation:
.detached
case let .suspendedAwaitingStartOfRetryOperation(error):
case let .suspendedAwaitingStartOfRetryOperation(_, error):
.suspended(error: error)
case let .suspended(_, error):
.suspended(error: error)
Expand Down Expand Up @@ -509,7 +509,14 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor

clearTransientDisconnectTimeouts()

changeStatus(to: .suspendedAwaitingStartOfRetryOperation(error: reason))
// TODO: does it matter that this task is happening beforehand? i.e. might the two SUSPENDED statuses come out of order? I think not, given that there is synchronous code here. Make a comment
let retryOperationTask = scheduleAnOperation(
kind: .retry(
triggeringContributor: contributor,
errorForSuspendedStatus: reason
)
)
changeStatus(to: .suspendedAwaitingStartOfRetryOperation(retryOperationTask: retryOperationTask, error: reason))
}
case .attaching:
if !hasOperationInProgress, !contributorAnnotations[contributor].hasTransientDisconnectTimeout {
Expand Down Expand Up @@ -719,6 +726,25 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
try result.get()
}

/// The kinds of operation that you can schedule using ``scheduleAnOperation(kind:)``.
private enum OperationKind {
/// The RETRY operation.
case retry(triggeringContributor: Contributor, errorForSuspendedStatus: ARTErrorInfo)
}

// TODO: doc (what’s the return value?) and logging
private func scheduleAnOperation(kind: OperationKind) -> Task<Void, Never> {
Task {
switch kind {
case let .retry(triggeringContributor, errorForSuspendedStatus):
await performRetryOperation(
triggeredByContributor: triggeringContributor,
errorForSuspendedStatus: errorForSuspendedStatus
)
}
}
}

// MARK: - ATTACH operation

internal func performAttachOperation() async throws {
Expand Down Expand Up @@ -781,9 +807,16 @@ internal actor DefaultRoomLifecycleManager<Contributor: RoomLifecycleContributor
case .suspended:
// CHA-RL1h2
let error = ARTErrorInfo(chatError: .attachmentFailed(feature: contributor.feature, underlyingError: contributorAttachError))
changeStatus(to: .suspendedAwaitingStartOfRetryOperation(error: error))

// CHA-RL1h3
// TODO: does it matter that this task is happening beforehand? i.e. might the two SUSPENDED statuses come out of order? I think not, given that there is synchronous code here. Make a comment
let retryOperationTask = scheduleAnOperation(
kind: .retry(
triggeringContributor: contributor,
errorForSuspendedStatus: error
)
)
changeStatus(to: .suspendedAwaitingStartOfRetryOperation(retryOperationTask: retryOperationTask, error: error))
throw error
case .failed:
// CHA-RL1h4
Expand Down
74 changes: 66 additions & 8 deletions Tests/AblyChatTests/DefaultRoomLifecycleManagerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -310,23 +310,51 @@ struct DefaultRoomLifecycleManagerTests {

// @spec CHA-RL1h2
// @specOneOf(1/2) CHA-RL1h1 - tests that an error gets thrown when channel attach fails due to entering SUSPENDED (TODO: but I don’t yet fully understand the meaning of CHA-RL1h1; outstanding question https://github.com/ably/specification/pull/200/files#r1765476610)
// @specPartial CHA-RL1h3 - Have tested the failure of the operation and the error that’s thrown. Have not yet implemented the "enter the recovery loop" (TODO: https://github.com/ably-labs/ably-chat-swift/issues/50)
// @spec CHA-RL1h3
@Test
func attach_whenContributorFailsToAttachAndEntersSuspended_transitionsToSuspended() async throws {
func attach_whenContributorFailsToAttachAndEntersSuspended_transitionsToSuspendedAndPerformsRetryOperation() async throws {
// Given: A DefaultRoomLifecycleManager, one of whose contributors’ call to `attach` fails causing it to enter the SUSPENDED status
let contributorAttachError = ARTErrorInfo(domain: "SomeDomain", code: 123)
var nonFailingContributorsDetachOperations: [SignallableChannelOperation] = []
let contributors = (1 ... 3).map { i in
if i == 1 {
createContributor(attachBehavior: .completeAndChangeState(.failure(contributorAttachError), newState: .suspended))
return createContributor(
attachBehavior: .fromFunction { callCount in
if callCount == 1 {
.completeAndChangeState(.failure(contributorAttachError), newState: .suspended) // The behaviour described above
} else {
.complete(.success) // So the CHA-RL5f RETRY attachment cycle succeeds
}
},

// This is so that the RETRY operation’s wait-to-become-ATTACHED succeeds
subscribeToStateBehavior: .addSubscriptionAndEmitStateChange(
.init(
current: .attached,
previous: .detached, // arbitrary
event: .attached,
reason: nil // arbitrary
)
)
)
} else {
createContributor(attachBehavior: .success)
// These contributors will be detached by the RETRY operation; we want to be able to delay their completion until we have been able to verify that the room’s status is SUSPENDED; that is, to defer the RETRY operation’s transition to DETACHED
let detachOperation = SignallableChannelOperation()
nonFailingContributorsDetachOperations.append(detachOperation)

return createContributor(
attachBehavior: .success, // So the CHA-RL5f RETRY attachment cycle succeeds
detachBehavior: detachOperation.behavior
)
}
}

let manager = await createManager(contributors: contributors)

let statusChangeSubscription = await manager.onRoomStatusChange(bufferingPolicy: .unbounded)
async let maybeSuspendedStatusChange = statusChangeSubscription.suspendedElements().first { _ in true }
let roomStatusChangeSubscription = await manager.onRoomStatusChange(bufferingPolicy: .unbounded)
async let maybeSuspendedRoomStatusChange = roomStatusChangeSubscription.suspendedElements().first { _ in true }

let managerStatusChangeSubscription = await manager.testsOnly_onStatusChange()

// When: `performAttachOperation()` is called on the lifecycle manager
async let roomAttachResult: Void = manager.performAttachOperation()
Expand All @@ -336,7 +364,7 @@ struct DefaultRoomLifecycleManagerTests {
// 1. the room status transitions to SUSPENDED, with the status change’s `error` having the AttachmentFailed code corresponding to the feature of the failed contributor, `cause` equal to the error thrown by the contributor `attach` call
// 2. the manager’s `error` is set to this same error
// 3. the room attach operation fails with this same error
let suspendedStatusChange = try #require(await maybeSuspendedStatusChange)
let suspendedRoomStatusChange = try #require(await maybeSuspendedRoomStatusChange)

#expect(await manager.roomStatus.isSuspended)

Expand All @@ -347,9 +375,39 @@ struct DefaultRoomLifecycleManagerTests {
roomAttachError = error
}

for error in await [suspendedStatusChange.error, manager.roomStatus.error, roomAttachError] {
for error in await [suspendedRoomStatusChange.error, manager.roomStatus.error, roomAttachError] {
#expect(isChatError(error, withCode: .messagesAttachmentFailed, cause: contributorAttachError))
}

// and:
// 4. The manager asynchronously performs a RETRY operation

// Allow the RETRY operation’s contributor detaches to complete
for detachOperation in nonFailingContributorsDetachOperations {
detachOperation.complete(behavior: .success) // So the CHA-RL5a RETRY detachment cycle succeeds
}

// Wait for the RETRY to finish
let retryOperationTask = try #require(await managerStatusChangeSubscription.compactMap { statusChange in
if case let .suspendedAwaitingStartOfRetryOperation(retryOperationTask, _) = statusChange.current {
return retryOperationTask
}
return nil
}
.first { _ in
true
})

await retryOperationTask.value

// We confirm that a RETRY happened by checking for its expected side effects:
#expect(await contributors[0].channel.detachCallCount == 0) // RETRY doesn’t touch this since it’s the one that triggered the RETRY
#expect(await contributors[1].channel.detachCallCount == 1) // From the CHA-RL5a RETRY detachment cycle
#expect(await contributors[2].channel.detachCallCount == 1) // From the CHA-RL5a RETRY detachment cycle

#expect(await contributors[0].channel.attachCallCount == 2) // From the ATTACH operation and the CHA-RL5f RETRY attachment cycle
#expect(await contributors[1].channel.attachCallCount == 1) // From the CHA-RL5f RETRY attachment cycle
#expect(await contributors[2].channel.attachCallCount == 1) // From the CHA-RL5f RETRY attachment cycle
}

// @specOneOf(2/2) CHA-RL1h1 - tests that an error gets thrown when channel attach fails due to entering FAILED (TODO: but I don’t yet fully understand the meaning of CHA-RL1h1; outstanding question https://github.com/ably/specification/pull/200/files#r1765476610))
Expand Down

0 comments on commit 6bd7e60

Please sign in to comment.