Fix delay of user data BUT introduce issue around ⚠️not respecting parent cancellation⚠️ due to SupervisorJob()

If we attempt to create create scope with SupervisorJob() we end up with a scope that does not respect parent cancellation.

Example Illustrated below, where once we cancel parent job, our background worker continues to work.

GT-Sandbox-Snapshot

Code

package com.glassthought.sandbox

import com.glassthought.sandbox.util.out.impl.out
import gt.sandbox.util.output.Emojis
import gt.sandbox.util.output.Out
import kotlinx.coroutines.CancellationException
import kotlinx.coroutines.CoroutineExceptionHandler
import kotlinx.coroutines.CoroutineName
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.async
import kotlinx.coroutines.cancel
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlin.time.Duration.Companion.milliseconds
import kotlin.time.Duration.Companion.seconds

private suspend fun mainImpl(out: Out) {
  val metricsScope = createMetricsScope(out)

  val useCase = GetUserDataUseCase(UserDataRepository(), metricsScope)

  val userData = useCase.getUserData()

  out.info("User data that can be used by user: $userData")
  delay(3000.milliseconds)
}

private suspend fun createMetricsScope(out: Out): CoroutineScope {
  val coroutineExceptionHandler = CoroutineExceptionHandler { coroutineContext, throwable ->
    runBlocking {
      out.warn("background failure handled, failure_message=[" + throwable.message + "] in coroutine_context=[" + coroutineContext + "]")
    }
  }

  // [⚠️ +Job()/+SupervisorJob() must-inherit-from-parent to respect parent cancellation ⚠️](http://www.glassthought.com/notes/678f18et3ek7l31u2g5kxte) just creating a SupervisorJob() is prblematic, since it does not inherit from parent scope. and will not cancel children on parent cancellation.
  return CoroutineScope(
    SupervisorJob()
        + CoroutineName("MetricsScope")
        + coroutineExceptionHandler
  )
}

class GetUserDataUseCase(
  private val repo: UserDataRepository,
  private val metricsScope: CoroutineScope,
) {
  suspend fun getUserData() = coroutineScope {
    val name = async(CoroutineName("getName()")) { repo.getName() }
    val friends = async(CoroutineName("getFriends()")) { repo.getFriends() }
    val profile = async(CoroutineName("getProfile()")) { repo.getProfile() }

    // Await to get all the user data - ESSENTIAL call!
    val user = User(
      name = name.await(),
      friends = friends.await(),
      profile = profile.await()
    )

    out.info("We have all the required user data, let's just notify metrics...")

    // Notify metric system that we got the user data. On another scope.
    metricsScope.launch(
      CoroutineName("notifyMetricsOfUserDataLoaded()")
    ) { repo.notifyMetricsOfUserDataLoaded() }

    user
  }
}


fun main(): Unit {

  // NOTE: nested runBlocking is an ANTI-PATTERN! but it makes it easier to
  // illustrate the issue with SupervisorJob() not inheriting from parent scope.

  // runBlocking-l1
  runBlocking(CoroutineName("runBlocking-l1")) {
    try {
      // runBlocking-l2
      runBlocking(CoroutineName("runBlocking-l2")) {
        val runBlockingL2 = this

        launch {
          out.actionWithMsg(
            "cancel runBlocking-l2 scope", {
              runBlockingL2.cancel(
                CancellationException("cancel entire runBlocking-l2 scope")
              )
            }, delayDuration = 2.seconds
          )
        }

        try {
          mainImpl(out)

          out.info("DONE - runBlocking-l2 - mainImpl() finished without exceptions!")
        } catch (e: Exception) {
          out.error("EXCEPTION AT runBlocking-l2 [${e::class.simpleName}] with msg=[${e.message}]!")
        }
      }
    } catch (e: Exception) {
      out.error("runBlocking-l2 threw error that we caught [${e::class.simpleName}] with msg=[${e.message}] (inRunBlocking-l1)!")
    }

    out.delayNamed(5.seconds, "Delay for background scope to finish")
  }


}

class UserDataRepository {
  suspend fun getName(): String {

    return out.actionWithMsg(
      "getName()", { "Tom" }, 500.milliseconds
    )
  }

  suspend fun getFriends() {
    return out.actionWithMsg(
      "getFriends()", { listOf("Alice", "Bob") }, 1000.milliseconds
    )
  }

  suspend fun getProfile() {

    return out.actionWithMsg(
      "getProfile()", { "Profile of Tom" }, 1500.milliseconds
    )
  }

  suspend fun notifyMetricsOfUserDataLoaded() {
    return out.actionWithMsg(
      "${Emojis.TURTLE}notifyMetricsOfUserDataLoaded()${Emojis.TURTLE}", {

        throw MyRuntimeException.create(
          "metric-system-is-down",
          out
        )
      }, 5000.milliseconds
    )
  }
}


data class User(val name: String, val friends: Any, val profile: Any)

Command to reproduce:

gt.sandbox.checkout.commit d501d9b796c8f36becb3 \
&& cd "${GT_SANDBOX_REPO}" \
&& cmd.run.announce "./gradlew run --quiet"

Recorded output of command:

Picked up JAVA_TOOL_OPTIONS: -Dkotlinx.coroutines.debug
Picked up JAVA_TOOL_OPTIONS: -Dkotlinx.coroutines.debug
[INFO][elapsed:   28ms][🥇][①][coroutname:@runBlocking-l2#3][tname:main/tid:1] [>] Starting action=[cancel runBlocking-l2 scope] with delay before action of [2000 ms]
[INFO][elapsed:   40ms][🥇][⓶][coroutname:@getName()#4][tname:main/tid:1] [>] Starting action=[getName()] with delay before action of [500 ms]
[INFO][elapsed:   40ms][🥇][⓷][coroutname:@getFriends()#5][tname:main/tid:1] [>] Starting action=[getFriends()] with delay before action of [1000 ms]
[INFO][elapsed:   40ms][🥇][⓸][coroutname:@getProfile()#6][tname:main/tid:1] [>] Starting action=[getProfile()] with delay before action of [1500 ms]
[INFO][elapsed:  541ms][🥇][⓶][coroutname:@getName()#4][tname:main/tid:1] [<] Finished action=[getName()].
[INFO][elapsed: 1041ms][🥇][⓷][coroutname:@getFriends()#5][tname:main/tid:1] [<] Finished action=[getFriends()].
[INFO][elapsed: 1541ms][🥇][⓸][coroutname:@getProfile()#6][tname:main/tid:1] [<] Finished action=[getProfile()].
[INFO][elapsed: 1541ms][🥇][⓹][coroutname:@runBlocking-l2#2][tname:main/tid:1] We have all the required user data, let's just notify metrics...
[INFO][elapsed: 1547ms][2️⃣][⓺][coroutname:@notifyMetricsOfUserDataLoaded()#7][tname:DefaultDispatcher-worker-1/tid:31] [>] Starting action=[🐢notifyMetricsOfUserDataLoaded()🐢] with delay before action of [5000 ms]
[INFO][elapsed: 1548ms][🥇][⓹][coroutname:@runBlocking-l2#2][tname:main/tid:1] User data that can be used by user: User(name=Tom, friends=kotlin.Unit, profile=kotlin.Unit)
[INFO][elapsed: 2040ms][🥇][①][coroutname:@runBlocking-l2#3][tname:main/tid:1] [<] Finished action=[cancel runBlocking-l2 scope].
[ERROR][elapsed: 2084ms][🥇][⓹][coroutname:@runBlocking-l2#2][tname:main/tid:1] EXCEPTION AT runBlocking-l2 [CancellationException] with msg=[cancel entire runBlocking-l2 scope]!
[ERROR][elapsed: 2084ms][🥇][⓻][coroutname:@runBlocking-l1#1][tname:main/tid:1] runBlocking-l2 threw error that we caught [CancellationException] with msg=[cancel entire runBlocking-l2 scope] (inRunBlocking-l1)!
[INFO][elapsed: 2085ms][🥇][⓻][coroutname:@runBlocking-l1#1][tname:main/tid:1] Delaying for 5s what_for=[Delay for background scope to finish]
[WARN][elapsed: 6549ms][2️⃣][⓺][coroutname:@notifyMetricsOfUserDataLoaded()#7][tname:DefaultDispatcher-worker-1/tid:31]    💥 throwing exception=[MyRuntimeException] with msg=[metric-system-is-down]
[WARN][elapsed: 6550ms][2️⃣][⓺][coroutname:@notifyMetricsOfUserDataLoaded()#7][tname:DefaultDispatcher-worker-1/tid:31] [<][💥] Finished action=[🐢notifyMetricsOfUserDataLoaded()🐢], threw exception of type=[MyRuntimeException].
[WARN][elapsed: 6551ms][2️⃣][⓼][coroutname:@coroutine#8][tname:DefaultDispatcher-worker-1/tid:31] background failure handled, failure_message=[metric-system-is-down] in coroutine_context=[[com.glassthought.sandbox.MainKt$createMetricsScope$$inlined$CoroutineExceptionHandler$1@74c218a0, CoroutineName(notifyMetricsOfUserDataLoaded()), CoroutineId(7), "notifyMetricsOfUserDataLoaded()#7":StandaloneCoroutine{Cancelling}@40ca41e1, Dispatchers.Default]]
[INFO][elapsed: 7085ms][🥇][⓻][coroutname:@runBlocking-l1#1][tname:main/tid:1] Done delaying for 5s what_for=[Delay for background scope to finish]

We dive into this issue more in +Job()/+SupervisorJob() must-inherit-from-parent or it will ⚠️not respect parent cancellation⚠️


Backlinks