From d4cfc19fe34fb9705e22f9cb7499999222178652 Mon Sep 17 00:00:00 2001
From: sacOO7 <sachinshinde7676@gmail.com>
Date: Mon, 9 Dec 2024 17:13:45 +0530
Subject: [PATCH] [ECO-5095][CHA-RL*] Implement logging for
 RoomLifecycleManager 1. Added private property logger with thread specific
 dynamic context 2. Added trace, debug, error logging for private doRetry
 method 3. Added trace, debug, error logging for attach method 4. Added trace,
 debug, error logging for detach method

---
 .../com/ably/chat/RoomLifecycleManager.kt     | 123 ++++++++++++------
 1 file changed, 85 insertions(+), 38 deletions(-)

diff --git a/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt b/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt
index 4da631bc..c3ead4d8 100644
--- a/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt
+++ b/chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt
@@ -117,8 +117,12 @@ internal class RoomLifecycleManager(
     private val roomScope: CoroutineScope,
     private val statusLifecycle: DefaultRoomLifecycle,
     private val contributors: List<ContributesToRoomLifecycle>,
-    private val logger: Logger,
+    roomLogger: Logger,
 ) {
+    private val logger = roomLogger.withContext(
+        "RoomLifecycleManager",
+        dynamicContext = mapOf("scope" to { Thread.currentThread().name }),
+    )
 
     /**
      * AtomicCoroutineScope makes sure all operations are atomic and run with given priority.
@@ -183,63 +187,84 @@ internal class RoomLifecycleManager(
      * @returns Returns when the room is attached, or the room enters a failed state.
      * Spec: CHA-RL5
      */
-    @Suppress("CognitiveComplexMethod", "ThrowsCount")
+    @Suppress("CognitiveComplexMethod", "ThrowsCount", "LongMethod")
     private suspend fun doRetry(contributor: ContributesToRoomLifecycle) {
+        logger.trace("doRetry();")
+        // Handle the channel wind-down.
+        logger.debug("doRetry(); winding down channels except for feature: ${contributor.featureName}")
         // CHA-RL5a - Handle the channel wind-down for other channels
         var result = kotlin.runCatching { doChannelWindDown(contributor) }
         while (result.isFailure) {
             // CHA-RL5c - If in doing the wind down, we've entered failed state, then it's game over anyway
             if (this.statusLifecycle.status === RoomStatus.Failed) {
+                logger.error("doRetry(); channels wind down failed, RoomStatus is FAILED", result.exceptionOrNull())
                 throw result.exceptionOrNull() ?: IllegalStateException("room is in a failed state")
             }
+            logger.warn("doRetry(); channels wind down failed, retrying in $retryDurationInMs ms", result.exceptionOrNull())
             delay(retryDurationInMs)
             result = kotlin.runCatching { doChannelWindDown(contributor) }
         }
+        logger.debug("doRetry(); channels wind down successfully completed")
 
         // A helper that allows us to retry the attach operation
         val doAttachWithRetry: suspend () -> Unit = {
+            logger.trace("doRetry.doAttachWithRetry();")
             coroutineScope {
+                logger.debug("doRetry.doAttachWithRetry(); attempting to attach all feature channels")
                 statusLifecycle.setStatus(RoomStatus.Attaching)
                 val attachmentResult = doAttach()
 
                 // CHA-RL5c - If we're in failed, then we should wind down all the channels, eventually - but we're done here
                 if (attachmentResult.status === RoomStatus.Failed) {
+                    logger.error("doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Failed", attachmentResult.exception)
                     atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
                         runDownChannelsOnFailedAttach()
                     }
+                    logger.warn("doRetry.doAttachWithRetry(); started winding down all channels internally")
                     return@coroutineScope
                 }
 
                 // If we're in suspended, then we should wait for the channel to reattach, but wait for it to do so
                 if (attachmentResult.status === RoomStatus.Suspended) {
-                    val failedFeature = attachmentResult.failedFeature
-                        ?: throw lifeCycleException(
-                            "no failed feature in doRetry",
-                            ErrorCode.RoomLifecycleError,
-                        )
+                    logger.warn(
+                        "doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Suspended",
+                        attachmentResult.exception,
+                    )
+                    val failedFeature = attachmentResult.failedFeature ?: throw lifeCycleException(
+                        "no failed feature in doRetry",
+                        ErrorCode.RoomLifecycleError,
+                    )
+                    logger.warn("doRetry.doAttachWithRetry(); Retrying attach due to suspended feature: ${failedFeature.featureName}")
                     // No need to catch errors, rather they should propagate to caller method
                     return@coroutineScope doRetry(failedFeature)
                 }
+
                 // We attached, huzzah!
+                logger.debug("doRetry.doAttachWithRetry(); Room ATTACH success, RoomStatus: ${statusLifecycle.status}")
             }
         }
 
         // If given suspended contributor channel has reattached, then we can retry the attach
         if (contributor.channel.state == ChannelState.attached) {
+            logger.debug("doRetry(); feature: ${contributor.featureName} reattached, retrying attach for others")
             return doAttachWithRetry()
         }
 
         // CHA-RL5d - Otherwise, wait for our suspended contributor channel to re-attach and try again
         try {
+            logger.debug("doRetry(); waiting to channel ATTACHED for feature: ${contributor.featureName}")
             listenToChannelAttachOrFailure(contributor)
+            logger.debug("doRetry(); waiting success, feature: ${contributor.featureName} ATTACHED, retrying attach for others")
             delay(retryDurationInMs) // Let other channels get into ATTACHING state
-            // Attach successful
-            return doAttachWithRetry()
-        } catch (ex: AblyException) {
+        } catch (ex: Throwable) {
             // CHA-RL5c - Channel attach failed
-            statusLifecycle.setStatus(RoomStatus.Failed, ex.errorInfo)
+            logger.error("doRetry(); waiting failed for channel attach, feature: ${contributor.featureName}", ex)
+            // No need for channels wind down, since it's taken care at the start of doRetry()
+            statusLifecycle.setStatus(RoomStatus.Failed, (ex as? AblyException)?.errorInfo)
             throw ex
         }
+        // Attach successful, try attach for other channels
+        return doAttachWithRetry()
     }
 
     /**
@@ -292,59 +317,60 @@ internal class RoomLifecycleManager(
      */
     @Suppress("ThrowsCount")
     internal suspend fun attach() {
+        logger.trace("attach();")
         val deferredAttach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL1d
             if (statusLifecycle.status == RoomStatus.Attached) { // CHA-RL1a
+                logger.debug("attach(); room is already attached")
                 return@async
             }
 
             if (statusLifecycle.status == RoomStatus.Releasing) { // CHA-RL1b
-                throw lifeCycleException(
-                    "unable to attach room; room is releasing",
-                    ErrorCode.RoomIsReleasing,
-                )
+                logger.error("attach(); attach failed, room is in releasing state")
+                throw lifeCycleException("unable to attach room; room is releasing", ErrorCode.RoomIsReleasing)
             }
 
             if (statusLifecycle.status == RoomStatus.Released) { // CHA-RL1c
-                throw lifeCycleException(
-                    "unable to attach room; room is released",
-                    ErrorCode.RoomIsReleased,
-                )
+                logger.error("attach(); attach failed, room is in released state")
+                throw lifeCycleException("unable to attach room; room is released", ErrorCode.RoomIsReleased)
             }
 
             // At this point, we force the room status to be attaching
             clearAllTransientDetachTimeouts()
             operationInProgress = true
+            logger.debug("attach(); transitioned room into ATTACHING state")
             statusLifecycle.setStatus(RoomStatus.Attaching) // CHA-RL1e
 
             val attachResult = doAttach()
 
             // CHA-RL1h4 - If we're in a failed state, then we should wind down all the channels, eventually
             if (attachResult.status === RoomStatus.Failed) {
+                logger.error("attach(); failed to attach channels, RoomStatus is Failed", attachResult.exception)
                 // CHA-RL1h5 - detach all remaining channels
                 atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
                     runDownChannelsOnFailedAttach()
                 }
+                logger.warn("attach(); started winding down all channels internally")
                 throw attachResult.exception // CHA-RL1h1
             }
 
             // CHA-RL1h1, CHA-RL1h2 - If we're in suspended, then this attach should fail, but we'll retry after a short delay async
             if (attachResult.status === RoomStatus.Suspended) {
+                logger.warn("attach(); failed to attach channels, RoomStatus is Suspended", attachResult.exception)
                 if (attachResult.failedFeature == null) {
-                    throw lifeCycleException(
-                        "no failed feature in attach",
-                        ErrorCode.RoomLifecycleError,
-                    )
+                    throw lifeCycleException("no failed feature in attach", ErrorCode.RoomLifecycleError)
                 }
                 attachResult.failedFeature?.let {
                     // CHA-RL1h3 - Enter recovery for failed room feature/contributor
                     atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
                         doRetry(it)
                     }
+                    logger.warn("attach(); retrying attach due to suspended feature: ${it.featureName}")
                 }
                 throw attachResult.exception // CHA-RL1h1
             }
 
             // We attached, finally!
+            logger.debug("attach(): room attached successfully")
         }
 
         deferredAttach.await()
@@ -357,12 +383,17 @@ internal class RoomLifecycleManager(
      * Spec: CHA-RL1f, CHA-RL1g, CHA-RL1h
      */
     private suspend fun doAttach(): RoomAttachmentResult {
+        logger.trace("doAttach();")
         val attachResult = DefaultRoomAttachmentResult()
+        logger.debug("doAttach(); trying to attach all features: ${contributors.joinToString { it.featureName }}")
         for (feature in contributors) { // CHA-RL1f - attach each feature sequentially
             try {
+                logger.debug("doAttach(); attaching feature: ${feature.featureName}")
                 feature.channel.attachCoroutine()
                 firstAttachesCompleted[feature] = true
+                logger.debug("doAttach(); attached feature: ${feature.featureName}")
             } catch (ex: Throwable) { // CHA-RL1h - handle channel attach failure
+                logger.error("doAttach(); attach failed for feature: ${feature.featureName}", ex)
                 attachResult.throwable = ex
                 attachResult.failedFeatureField = feature
                 attachResult.errorField = lifeCycleErrorInfo(
@@ -387,17 +418,21 @@ internal class RoomLifecycleManager(
 
                 // Regardless of whether we're suspended or failed, run-down the other channels
                 // The wind-down procedure will take Precedence over any user-driven actions
+                logger.warn("doAttach(); transitioning room into ${attachResult.status.name} state")
                 statusLifecycle.setStatus(attachResult)
                 return attachResult
             }
         }
 
         // CHA-RL1g, We successfully attached all the channels - set our status to attached, start listening changes in channel status
+        logger.debug("doAttach(); attach success for all features: ${contributors.joinToString { it.featureName }}")
+        logger.debug("doAttach(); transitioning room to ATTACHED state")
         this.statusLifecycle.setStatus(attachResult)
         this.operationInProgress = false
 
         // Iterate the pending discontinuity events and trigger them
         for ((contributor, error) in pendingDiscontinuityEvents) {
+            logger.warn("doAttach(); emitting discontinuity event for feature: ${contributor.featureName}")
             contributor.discontinuityDetected(error)
         }
         pendingDiscontinuityEvents.clear()
@@ -407,9 +442,11 @@ internal class RoomLifecycleManager(
     /**
      * If we've failed to attach, then we're in the failed state and all that is left to do is to detach all the channels.
      * Spec: CHA-RL1h5, CHA-RL1h6
-     * @returns Returns only when all channels are detached. Doesn't throw exception.
+     * @returns Returns only when all channels are either detached or failed. Doesn't throw exception.
      */
     private suspend fun runDownChannelsOnFailedAttach() {
+        logger.trace("runDownChannelsOnFailedAttach();")
+        logger.debug("runDownChannelsOnFailedAttach(); attempting to detach all channels")
         // At this point, we have control over the channel lifecycle, so we can hold onto it until things are resolved
         // Keep trying to detach the channels until they're all detached.
         var channelWindDown = kotlin.runCatching { doChannelWindDown() }
@@ -419,6 +456,7 @@ internal class RoomLifecycleManager(
             delay(retryDurationInMs)
             channelWindDown = kotlin.runCatching { doChannelWindDown() }
         }
+        logger.debug("runDownChannelsOnFailedAttach(); success, all channels are either detached or failed")
     }
 
     /**
@@ -431,11 +469,13 @@ internal class RoomLifecycleManager(
      */
     @Suppress("CognitiveComplexMethod", "ComplexCondition")
     private suspend fun doChannelWindDown(except: ContributesToRoomLifecycle? = null) = coroutineScope {
+        logger.trace("doChannelWindDown();")
         contributors.map { contributor: ContributesToRoomLifecycle ->
             async {
                 // CHA-RL5a1 - If its the contributor we want to wait for a conclusion on, then we should not detach it
                 // Unless we're in a failed state, in which case we should detach it
                 if (contributor.channel === except?.channel && statusLifecycle.status !== RoomStatus.Failed) {
+                    logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} as per except param")
                     return@async
                 }
                 // If the room's already in the failed state, or it's releasing, we should not detach a failed channel
@@ -446,12 +486,15 @@ internal class RoomLifecycleManager(
                         ) &&
                     contributor.channel.state === ChannelState.failed
                 ) {
+                    logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} since it's already in failed state")
                     return@async
                 }
 
                 try {
+                    logger.debug("doChannelWindDown(); detaching feature: ${contributor.featureName}")
                     contributor.channel.detachCoroutine()
                 } catch (throwable: Throwable) {
+                    logger.warn("doChannelWindDown(); failed to detach feature: ${contributor.featureName}", throwable)
                     // CHA-RL2h2 - If the contributor is in a failed state and we're not ignoring failed states, we should fail the room
                     if (
                         contributor.channel.state === ChannelState.failed &&
@@ -482,38 +525,35 @@ internal class RoomLifecycleManager(
      */
     @Suppress("ThrowsCount")
     internal suspend fun detach() {
+        logger.trace("detach();")
         val deferredDetach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL2i
             // CHA-RL2a - If we're already detached, this is a no-op
             if (statusLifecycle.status === RoomStatus.Detached) {
+                logger.debug("detach(); room is already detached")
                 return@async
             }
             // CHA-RL2c - If the room is released, we can't detach
             if (statusLifecycle.status === RoomStatus.Released) {
-                throw lifeCycleException(
-                    "unable to detach room; room is released",
-                    ErrorCode.RoomIsReleased,
-                )
+                logger.error("detach(); detach failed, room is in released state")
+                throw lifeCycleException("unable to detach room; room is released", ErrorCode.RoomIsReleased)
             }
 
             // CHA-RL2b - If the room is releasing, we can't detach
             if (statusLifecycle.status === RoomStatus.Releasing) {
-                throw lifeCycleException(
-                    "unable to detach room; room is releasing",
-                    ErrorCode.RoomIsReleasing,
-                )
+                logger.error("detach(); detach failed, room is in releasing state")
+                throw lifeCycleException("unable to detach room; room is releasing", ErrorCode.RoomIsReleasing)
             }
 
             // CHA-RL2d - If we're in failed, we should not attempt to detach
             if (statusLifecycle.status === RoomStatus.Failed) {
-                throw lifeCycleException(
-                    "unable to detach room; room has failed",
-                    ErrorCode.RoomInFailedState,
-                )
+                logger.error("detach(); detach failed, room is in failed state")
+                throw lifeCycleException("unable to detach room; room has failed", ErrorCode.RoomInFailedState)
             }
 
             // CHA-RL2e - We force the room status to be detaching
             operationInProgress = true
             clearAllTransientDetachTimeouts()
+            logger.debug("detach(); transitioned room into DETACHING state")
             statusLifecycle.setStatus(RoomStatus.Detaching)
 
             // CHA-RL2f - We now perform an all-channel wind down.
@@ -524,16 +564,18 @@ internal class RoomLifecycleManager(
     }
 
     /**
-     * Perform a detach.
-     * If detaching a channel fails, we should retry until every channel is either in the detached state, or in the failed state.
+     * Performs detach until every channel gets into either detached or failed state.
      * Spec: CHA-RL2f
      */
     private suspend fun doDetach() {
+        logger.trace("doDetach();")
+        logger.debug("doDetach(); detaching all features: ${contributors.joinToString { it.featureName }}")
         var channelWindDown = kotlin.runCatching { doChannelWindDown() }
         var firstContributorFailedError: AblyException? = null
         while (channelWindDown.isFailure) { // CHA-RL2h
             val err = channelWindDown.exceptionOrNull()
             if (err is AblyException && err.errorInfo?.code != -1 && firstContributorFailedError == null) {
+                logger.error("doDetach(); detach failed for one of the feature, detaching others", err)
                 firstContributorFailedError = err // CHA-RL2h1- First failed contributor error is captured
             }
             delay(retryDurationInMs)
@@ -542,6 +584,8 @@ internal class RoomLifecycleManager(
 
         // CHA-RL2g - If we aren't in the failed state, then we're detached
         if (statusLifecycle.status !== RoomStatus.Failed) {
+            logger.debug("doDetach(); successfully detached all features: ${contributors.joinToString { it.featureName }}")
+            logger.debug("doDetach(); transitioning room to DETACHED state")
             statusLifecycle.setStatus(RoomStatus.Detached)
             return
         }
@@ -564,6 +608,7 @@ internal class RoomLifecycleManager(
      * Spec: CHA-RL3
      */
     internal suspend fun release() {
+        logger.trace("release();")
         val deferredRelease = atomicCoroutineScope.async(LifecycleOperationPrecedence.Release.priority) { // CHA-RL3k
             // CHA-RL3a - If we're already released, this is a no-op
             if (statusLifecycle.status === RoomStatus.Released) {
@@ -595,6 +640,7 @@ internal class RoomLifecycleManager(
      *  Spec: CHA-RL3f, CHA-RL3d
      */
     private suspend fun releaseChannels() {
+        logger.trace("releaseChannels();")
         var contributorsReleased = kotlin.runCatching { doRelease() }
         while (contributorsReleased.isFailure) {
             // Wait a short period and then try again
@@ -610,6 +656,7 @@ internal class RoomLifecycleManager(
      */
     @Suppress("RethrowCaughtException")
     private suspend fun doRelease() = coroutineScope {
+        logger.trace("doRelease();")
         contributors.map { contributor: ContributesToRoomLifecycle ->
             async {
                 // CHA-RL3e - Failed channels, we can ignore