From 419e05b9b1791b0c9ec552ea327bc10f9eaf1112 Mon Sep 17 00:00:00 2001 From: Heitor Lessa Date: Mon, 11 Dec 2023 14:54:42 +0100 Subject: [PATCH] fix(logger): log non-ascii characters as is when JSON stringifying (#3475) * fix(parameters): make cache aware of single vs multiple calls Signed-off-by: heitorlessa * chore: cleanup, add test for single and nested Signed-off-by: heitorlessa * fix(logger): utf-8 encoding json Signed-off-by: heitorlessa * chore: add all non_ascii compatible with 3.7+ * chore: mention issue for future debuggability --------- Signed-off-by: heitorlessa --- aws_lambda_powertools/logging/formatter.py | 1 + tests/functional/test_logger.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 22419b160d1..8b34b326435 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -133,6 +133,7 @@ def __init__( default=self.json_default, separators=(",", ":"), indent=self.json_indent, + ensure_ascii=False, # see #3474 ) self.datefmt = datefmt diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index dbe2ed1917f..fc6e5b98ee8 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -1120,3 +1120,23 @@ def filter(self, record): log = capture_multiple_logging_statements_output(stdout) assert log[0]["api_key"] == "REDACTED" assert log[1]["api_key"] != "REDACTED" + + +def test_logger_json_unicode(stdout, service_name): + # GIVEN Logger is initialized + logger = Logger(service=service_name, stream=stdout) + + # WHEN all non-ascii chars are logged as messages + # AND non-ascii is also used as ephemeral fields + # latest: https://www.unicode.org/versions/Unicode15.1.0/#Summary + non_ascii_chars = [chr(i) for i in range(128, 111_411_1)] + japanese_field = "47業レルし化" + japanese_string = "スコビルデモ2" + + logger.info(non_ascii_chars, **{japanese_field: japanese_string}) + + # THEN JSON logs should not try to escape them + log = capture_logging_output(stdout) + + assert log["message"] == non_ascii_chars + assert log[japanese_field] == japanese_string