From 5b4b1b70b824c1d9ab97428b1bf0c50c1cf7818b Mon Sep 17 00:00:00 2001 From: Michael Barskii Date: Thu, 8 Aug 2024 11:05:33 -0700 Subject: [PATCH] Improve Samza AM retry count logging (#1701) * LISAMZA-43659 Improve Samza AM retry count logging * Output Current Timestamp at run-class.sh script (#1702) * print current timestamp * Fix typo * fix build issue about grolifant okhttp --------- Co-authored-by: Haolan Ye * Revert "Output Current Timestamp at run-class.sh script (#1702)" This reverts commit 1e84ac05eb70d7b1880bb2a62a03e5e1a4b3ef16. --------- Co-authored-by: Michael Barskii Co-authored-by: Haolan Ye Co-authored-by: Haolan Ye --- .../samza/clustermanager/ContainerProcessManager.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/samza-core/src/main/java/org/apache/samza/clustermanager/ContainerProcessManager.java b/samza-core/src/main/java/org/apache/samza/clustermanager/ContainerProcessManager.java index f8719890ee..756241f062 100644 --- a/samza-core/src/main/java/org/apache/samza/clustermanager/ContainerProcessManager.java +++ b/samza-core/src/main/java/org/apache/samza/clustermanager/ContainerProcessManager.java @@ -572,9 +572,10 @@ void onResourceCompletedWithUnknownStatus(SamzaResourceStatus resourceStatus, St // if fail count is (1 initial failure + max retries) then fail job. if (currentFailCount > retryCount) { - LOG.error("Processor ID: {} (current Container ID: {}) has failed {} times, with last failure {} ms ago. " + - "This is greater than retry count of {} and window of {} ms, ", - processorId, containerId, currentFailCount, durationSinceLastRetryMs, retryCount, retryWindowMs); + LOG.error("Processor ID: {} (current Container ID: {}) has failed {} times. " + + "This is greater that the retry count of {}." + + "The failure occurred {} ms after the previous one, which is less than the retry window of {} ms.", + processorId, containerId, currentFailCount, retryCount, durationSinceLastRetryMs, retryWindowMs); // We have too many failures, and we're within the window // boundary, so reset shut down the app master.