diff --git a/documentation/Built-in-Shutdown-Service.md b/documentation/Built-in-Shutdown-Service.md index 56959c7f..ceb42613 100644 --- a/documentation/Built-in-Shutdown-Service.md +++ b/documentation/Built-in-Shutdown-Service.md @@ -68,9 +68,14 @@ specifying a process exit status and final messages like this: :messages [["Unexpected filesystem error ..." *err*]]}} ``` -which will finally be thrown from `run` as an `ex-info` of `:kind` -`:puppetlabs.trapperkepper.core/exit` like this: - +This map is exactly the same map that can be thrown from an `init` or +`start` method via `ex-info` to initiate an immediate shutdown. +(Calls to `request-shutdown` only trigger a shutdown later, currently +after all of the services have been initialized and started.) + +Whether via `request-shutdown` or a throw from `init` or `start`, a +shutdown request will eventually cause `run` to throw an `ex-info` of +`:kind` `:puppetlabs.trapperkepper.core/exit` like this: ```clj {:kind :puppetlabs.trapperkepper.core/exit` diff --git a/documentation/Defining-Services.md b/documentation/Defining-Services.md index cfdc7493..9d7b7719 100644 --- a/documentation/Defining-Services.md +++ b/documentation/Defining-Services.md @@ -31,6 +31,9 @@ The default implementation of the lifecycle functions is to simply return the se Trapperkeeper will call the lifecycle functions in order based on the dependency list of the services; in other words, if your service has a dependency on service `Foo`, you are guaranteed that `Foo`'s `init` function will be called prior to yours, and that your `stop` function will be called prior to `Foo`'s. +If an exception is thrown by `init` or `start`, Trapperkeeper will +[initiate an immediate shutdown](Error-Handling.md). + ### Example Service Let's look at a concrete example: diff --git a/documentation/Error-Handling.md b/documentation/Error-Handling.md index 183e40e0..695d8846 100644 --- a/documentation/Error-Handling.md +++ b/documentation/Error-Handling.md @@ -6,6 +6,19 @@ If the `init` or `start` function of any service throws a `Throwable`, it will c If the `init` or `start` function of your service launches a background thread to perform some costly initialization computations (like, say, populating a pool of objects which are expensive to create), it is advisable to wrap that computation inside a call to `shutdown-on-error`; however, you should note that `shutdown-on-error` does *not* short-circuit Trapperkeeper's start-up sequence - the app will continue booting. The `init` and `start` functions of all services will still be run, and once that has completed, all `stop` functions will be called, and the process will terminate. +If the exception thrown by `init` or `start` is an `ex-info` exception +containing the same kind of map that +[`request-shutdown`](Built-in-Shutdown-Service.md#request-shutdown) +accepts, then Trapperkeeper will print the specified messages and exit +with the specified status as described there. For example: + + (ex-info "" + {:kind :puppetlabs.trapperkepper.core/exit` + {:status 3 + :messages [["Unexpected filesystem error ..." *err*]]}}) + +The `ex-info` message string is currently ignored. + ## Services Should Fail Fast Trapperkeeper embraces fail-fast behavior. With that in mind, we advise writing services that also fail-fast. In particular, if your service needs to spin-off a background thread to perform some expensive initialization logic, it is a best practice to push as much code as possible outside of the background thread (for example, validating configuration data), because `Throwables` on the main thread will propagate out of `init` or `start` and cause the application to shut down - i.e., it will *fail fast*. There are different operational semantics for errors thrown on a background thread (see previous section). diff --git a/src/puppetlabs/trapperkeeper/internal.clj b/src/puppetlabs/trapperkeeper/internal.clj index cdb1ceca..1cd220f8 100644 --- a/src/puppetlabs/trapperkeeper/internal.clj +++ b/src/puppetlabs/trapperkeeper/internal.clj @@ -176,6 +176,29 @@ required []] (first (ks/cli! cli-args specs required)))) +(def exit-request-schema + "A process exit request like + {:status 7 + :messages [[\"something for stderr\n\" *err*]] + [\"something for stdout\n\" *out*]] + [\"something else for stderr\n\" *err*]]" + {:status schema/Int + :messages [[(schema/one schema/Str "message") + (schema/one java.io.Writer "stream")]]}) + +(defn exit-exception? [ex] + (and (instance? ExceptionInfo ex) + (not (schema/check {(schema/optional-key :puppetlabs.trapperkeeper.core/exit) + exit-request-schema} + (ex-data ex))))) + +(defn shutdown-reason-for-ex + [exception] + (if (exit-exception? exception) + (merge {:cause :requested} + (select-keys (ex-data exception) [:puppetlabs.trapperkeeper.core/exit])) + {:cause :service-error :error exception})) + (schema/defn ^:always-validate run-lifecycle-fn! "Run a lifecycle function for a service. Required arguments: @@ -234,9 +257,15 @@ (log/debug (i18n/trs "Finished running lifecycle function ''{0}'' for service ''{1}''" lifecycle-fn-name service-id))) - (catch Throwable t - (log/error t (i18n/trs "Error during service {0}!!!" lifecycle-fn-name)) - (throw t)))) + (catch ExceptionInfo ex + (if (exit-exception? ex) + (log/info (i18n/trs "Immediate shutdown requested during service {0}" + lifecycle-fn-name)) + (log/error ex (i18n/trs "Error during service {0}!!!" lifecycle-fn-name))) + (throw ex)) + (catch Throwable ex + (log/error ex (i18n/trs "Error during service {0}!!!" lifecycle-fn-name)) + (throw ex)))) (schema/defn ^:always-validate initialize-lifecycle-worker :- (schema/protocol async-prot/Channel) "Initializes a 'worker' which will listen for lifecycle-related tasks and perform @@ -286,9 +315,7 @@ (log/debug (i18n/trs "Lifecycle worker completed {0} lifecycle task; awaiting next task." type)) (catch Exception e (log/debug e (i18n/trs "Exception caught in lifecycle worker loop")) - (deliver shutdown-reason-promise - {:cause :service-error - :error e}))) + (deliver shutdown-reason-promise (shutdown-reason-for-ex e)))) (recur)) (do @@ -345,16 +372,6 @@ ;;;; regarding the cause of the shutdown, and is intended to be passed back ;;;; in to the top-level functions that perform various shutdown steps. -(def exit-request-schema - "A process exit request like - {:status 7 - :messages [[\"something for stderr\n\" *err*]] - [\"something for stdout\n\" *out*]] - [\"something else for stderr\n\" *err*]]" - {:status schema/Int - :messages [[(schema/one schema/Str "message") - (schema/one java.io.Writer "stream")]]}) - (def ^{:private true :doc "The possible causes for shutdown to be initiated."} shutdown-causes #{:requested :service-error :jvm-shutdown-hook}) @@ -635,8 +652,7 @@ (inc-restart-counter! this) this (catch Throwable t - (deliver shutdown-reason-promise {:cause :service-error - :error t}))))))) + (deliver shutdown-reason-promise (shutdown-reason-for-ex t)))))))) (schema/defn ^:always-validate boot-services-for-app** "Boots services for a TK app. WARNING: This should only ever be called @@ -648,8 +664,7 @@ (a/init app) (a/start app) (catch Throwable t - (deliver shutdown-reason-promise {:cause :service-error - :error t}))) + (deliver shutdown-reason-promise (shutdown-reason-for-ex t)))) (deliver result-promise app))) (schema/defn ^:always-validate boot-services-for-app* :- (schema/protocol a/TrapperkeeperApp) diff --git a/test/puppetlabs/trapperkeeper/internal_test.clj b/test/puppetlabs/trapperkeeper/internal_test.clj index abe0f6c1..4a89ae6d 100644 --- a/test/puppetlabs/trapperkeeper/internal_test.clj +++ b/test/puppetlabs/trapperkeeper/internal_test.clj @@ -108,4 +108,41 @@ (tk-app/stop app) ;; and make sure that we got one last :stop (is (= (conj expected-lifecycle-events :stop) - @lifecycle-events))))) \ No newline at end of file + @lifecycle-events))))) + +(deftest test-immediate-shutdown-exceptions + (let [shutdown #(throw + (ex-info "Shutting down" + {:puppetlabs.trapperkeeper.core/exit + {:status 42 + :messages [["Something on stdderr" *err*]]}})) + service-that-shuts-down-from + (fn [stage events] + (tk/service + [] + (init [this context] + (swap! events conj :init) + (when (= :init stage) (shutdown)) + context) + (start [this context] + (swap! events conj :start) + (when (= :start stage) (shutdown)) + context) + (stop [this context] + (swap! events conj :stop) + context))) + config-fn (constantly {}) + test-stage + (fn test-stage [stage expected-events] + (let [events (atom []) + svc (service-that-shuts-down-from stage events) + app (internal/build-app* [svc] config-fn) + main-thread (future (internal/boot-services-for-app* app))] + @main-thread + (is (= expected-events @events)) + (let [{:keys [cause :puppetlabs.trapperkeeper.core/exit]} + (internal/get-app-shutdown-reason app)] + (is (= cause :requested)) + (is (= 42 (:status exit))))))] + (test-stage :init [:init]) + (test-stage :start [:init :start])))