diff --git a/docs/src/tutorials/logistic_regression.md b/docs/src/tutorials/logistic_regression.md
index 858e907cd5..01f45a5fcd 100644
--- a/docs/src/tutorials/logistic_regression.md
+++ b/docs/src/tutorials/logistic_regression.md
@@ -140,8 +140,8 @@ Note, all the `flux_*` variables in this tutorial would be general, that is, the
 julia> flux_model = Chain(Dense(4 => 3), softmax)
 Chain(
   Dense(4 => 3),                        # 15 parameters
-  NNlib.softmax,
-)
+  softmax,
+) 
 ```
 
 A [`Dense(4 => 3)`](@ref Dense) layer denotes a layer with four inputs (four features in every data point) and three outputs (three classes or labels). This layer is the same as the mathematical model defined by us above. Under the hood, Flux too calculates the output using the same expression, but we don't have to initialize the parameters ourselves this time, instead Flux does it for us.
diff --git a/test/runtests.jl b/test/runtests.jl
index f44c4b7758..ff6660be14 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,9 +11,9 @@ using Functors: fmapstructure_with_path
 
 ## Uncomment below to change the default test settings
 # ENV["FLUX_TEST_AMDGPU"] = "true"
-ENV["FLUX_TEST_CUDA"] = "true"
+# ENV["FLUX_TEST_CUDA"] = "true"
 # ENV["FLUX_TEST_METAL"] = "true"
-ENV["FLUX_TEST_CPU"] = "false"
+# ENV["FLUX_TEST_CPU"] = "false"
 # ENV["FLUX_TEST_DISTRIBUTED_MPI"] = "true"
 # ENV["FLUX_TEST_DISTRIBUTED_NCCL"] = "true"
 ENV["FLUX_TEST_ENZYME"] = "false" # We temporarily disable Enzyme tests since they are failing
diff --git a/test/train.jl b/test/train.jl
index 38338c19b9..5a1fd0592e 100644
--- a/test/train.jl
+++ b/test/train.jl
@@ -155,18 +155,15 @@ for (trainfn!, name) in ((Flux.train!, "Zygote"), (train_enzyme!, "Enzyme"))
       pen2(x::AbstractArray) = sum(abs2, x)/2
       opt = Flux.setup(Adam(0.1), model)
 
-      @test begin
-        trainfn!(model, data, opt) do m, x, y
-          err = Flux.mse(m(x), y)
-          l2 = sum(pen2, Flux.params(m))
-          err + 0.33 * l2
-        end
-
-        diff2 = model.weight .- init_weight
-        @test diff1 ≈ diff2
-  
-        true
-      end broken = VERSION >= v"1.11"
+      trainfn!(model, data, opt) do m, x, y
+        err = Flux.mse(m(x), y)
+        l2 = sum(pen2, Flux.params(m))
+        err + 0.33 * l2
+      end
+
+      diff2 = model.weight .- init_weight
+      @test diff1 ≈ diff2
+
     end
 
     # Take 3: using WeightDecay instead. Need the /2 above, to match exactly.
diff --git a/test/utils.jl b/test/utils.jl
index b526b63286..79eebded49 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -273,13 +273,11 @@ end
 @testset "params gradient" begin
   m = (x=[1,2.0], y=[3.0]);
 
-  @test begin
-    # Explicit -- was broken by #2054 / then fixed / now broken again on julia v1.11
-    gnew = gradient(m -> (sum(norm, Flux.params(m))), m)[1]
-    @test gnew.x ≈ [0.4472135954999579, 0.8944271909999159]
-    @test gnew.y ≈ [1.0]
-    true
-  end broken = VERSION >= v"1.11"
+  # Explicit -- was broken by #2054 / then fixed / now broken again on julia v1.11
+  gnew = gradient(m -> (sum(norm, Flux.params(m))), m)[1]
+  @test gnew.x ≈ [0.4472135954999579, 0.8944271909999159]
+  @test gnew.y ≈ [1.0]
+
 
   # Implicit
   gold = gradient(() -> (sum(norm, Flux.params(m))), Flux.params(m))