From 510418c132123358ca914ce2be7c7a28ea3fcade Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 5 Nov 2023 12:09:28 +0100 Subject: [PATCH 1/5] trace2: redact passwords from https:// URLs by default It is an unsafe practice to call something like git clone https://user:password@example.com/ This not only risks leaking the password "over the shoulder" or into the readline history of the current Unix shell, it also gets logged via Trace2 if enabled. Let's at least avoid logging such secrets via Trace2, much like we avoid logging secrets in `http.c`. Much like the code in `http.c` is guarded via `GIT_TRACE_REDACT` (defaulting to `true`), we guard the new code via `GIT_TRACE2_REDACT` (also defaulting to `true`). Signed-off-by: Johannes Schindelin --- t/t0210-trace2-normal.sh | 18 ++++++ trace2.c | 120 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 3 deletions(-) diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index 80e76a4695ed8d..b9adc94aab4165 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -283,4 +283,22 @@ test_expect_success 'using global config with include' ' test_cmp expect actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r trace.normal unredacted.normal clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2="$(pwd)/trace.normal" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.normal && + + GIT_TRACE2_REDACT=0 GIT_TRACE2="$(pwd)/unredacted.normal" \ + git clone https://user:pwd@example.com/ clone2 && + grep "start .* clone https://user:pwd@example.com" unredacted.normal && + grep "remote.origin.url=https://user:pwd@example.com" unredacted.normal +' + test_done diff --git a/trace2.c b/trace2.c index 6dc74dff4c7320..87d9a3a036129b 100644 --- a/trace2.c +++ b/trace2.c @@ -20,6 +20,7 @@ #include "trace2/tr2_tmr.h" static int trace2_enabled; +static int trace2_redact = 1; static int tr2_next_child_id; /* modify under lock */ static int tr2_next_exec_id; /* modify under lock */ @@ -227,6 +228,8 @@ void trace2_initialize_fl(const char *file, int line) if (!tr2_tgt_want_builtins()) return; trace2_enabled = 1; + if (!git_env_bool("GIT_TRACE2_REDACT", 1)) + trace2_redact = 0; tr2_sid_get(); @@ -247,12 +250,93 @@ int trace2_is_enabled(void) return trace2_enabled; } +/* + * Redacts an argument, i.e. ensures that no password in + * https://user:password@host/-style URLs is logged. + * + * Returns the original if nothing needed to be redacted. + * Returns a pointer that needs to be `free()`d otherwise. + */ +static const char *redact_arg(const char *arg) +{ + const char *p, *colon; + size_t at; + + if (!trace2_redact || + (!skip_prefix(arg, "https://", &p) && + !skip_prefix(arg, "http://", &p))) + return arg; + + at = strcspn(p, "@/"); + if (p[at] != '@') + return arg; + + colon = memchr(p, ':', at); + if (!colon) + return arg; + + return xstrfmt("%.*s:%s", (int)(colon - arg), arg, p + at); +} + +/* + * Redacts arguments in an argument list. + * + * Returns the original if nothing needed to be redacted. + * Otherwise, returns a new array that needs to be released + * via `free_redacted_argv()`. + */ +static const char **redact_argv(const char **argv) +{ + int i, j; + const char *redacted = NULL; + const char **ret; + + if (!trace2_redact) + return argv; + + for (i = 0; argv[i]; i++) + if ((redacted = redact_arg(argv[i])) != argv[i]) + break; + + if (!argv[i]) + return argv; + + for (j = 0; argv[j]; j++) + ; /* keep counting */ + + ALLOC_ARRAY(ret, j + 1); + ret[j] = NULL; + + for (j = 0; j < i; j++) + ret[j] = argv[j]; + ret[i] = redacted; + for (++i; argv[i]; i++) { + redacted = redact_arg(argv[i]); + ret[i] = redacted ? redacted : argv[i]; + } + + return ret; +} + +static void free_redacted_argv(const char **redacted, const char **argv) +{ + int i; + + if (redacted != argv) { + for (i = 0; argv[i]; i++) + if (redacted[i] != argv[i]) + free((void *)redacted[i]); + free((void *)redacted); + } +} + void trace2_cmd_start_fl(const char *file, int line, const char **argv) { struct tr2_tgt *tgt_j; int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return; @@ -260,10 +344,14 @@ void trace2_cmd_start_fl(const char *file, int line, const char **argv) us_now = getnanotime() / 1000; us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_start_fl) tgt_j->pfn_start_fl(file, line, us_elapsed_absolute, - argv); + redacted); + + free_redacted_argv(redacted, argv); } void trace2_cmd_exit_fl(const char *file, int line, int code) @@ -409,6 +497,7 @@ void trace2_child_start_fl(const char *file, int line, int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **orig_argv = cmd->args.v; if (!trace2_enabled) return; @@ -419,10 +508,24 @@ void trace2_child_start_fl(const char *file, int line, cmd->trace2_child_id = tr2tls_locked_increment(&tr2_next_child_id); cmd->trace2_child_us_start = us_now; + /* + * The `pfn_child_start_fl` API takes a `struct child_process` + * rather than a simple `argv` for the child because some + * targets make use of the additional context bits/values. So + * temporarily replace the original argv (inside the `strvec`) + * with a possibly redacted version. + */ + cmd->args.v = redact_argv(orig_argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_child_start_fl) tgt_j->pfn_child_start_fl(file, line, us_elapsed_absolute, cmd); + + if (cmd->args.v != orig_argv) { + free_redacted_argv(cmd->args.v, orig_argv); + cmd->args.v = orig_argv; + } } void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, @@ -493,6 +596,7 @@ int trace2_exec_fl(const char *file, int line, const char *exe, int exec_id; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return -1; @@ -502,10 +606,14 @@ int trace2_exec_fl(const char *file, int line, const char *exe, exec_id = tr2tls_locked_increment(&tr2_next_exec_id); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_exec_fl) tgt_j->pfn_exec_fl(file, line, us_elapsed_absolute, - exec_id, exe, argv); + exec_id, exe, redacted); + + free_redacted_argv(redacted, argv); return exec_id; } @@ -637,13 +745,19 @@ void trace2_def_param_fl(const char *file, int line, const char *param, { struct tr2_tgt *tgt_j; int j; + const char *redacted; if (!trace2_enabled) return; + redacted = redact_arg(value); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_param_fl) - tgt_j->pfn_param_fl(file, line, param, value, kvi); + tgt_j->pfn_param_fl(file, line, param, redacted, kvi); + + if (redacted != value) + free((void *)redacted); } void trace2_def_repo_fl(const char *file, int line, struct repository *repo) From 21b2850abe1b1f1909e832b8453696b38324d7a7 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 6 Nov 2023 16:59:34 -0500 Subject: [PATCH 2/5] t0211: test URL redacting in PERF format Signed-off-by: Jeff Hostetler --- t/t0211-trace2-perf.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index cfba6861322e37..06ab4edf14f9e8 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -268,4 +268,23 @@ test_expect_success PTHREADS 'global counter test/test2' ' have_counter_event "main" "counter" "test" "test2" 60 actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r actual trace.perf unredacted.perf clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2_PERF="$(pwd)/trace.perf" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.perf && + + GIT_TRACE2_REDACT=0 GIT_TRACE2_PERF="$(pwd)/unredacted.perf" \ + git clone https://user:pwd@example.com/ clone2 && + perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" actual && + grep "d0|main|start|.* clone https://user:pwd@example.com" actual && + grep "d0|main|def_param|.*|remote.origin.url:https://user:pwd@example.com" actual +' + test_done From a9f8c8866d91263b622f7ae8b20373dd8072661b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 6 Nov 2023 16:50:36 -0500 Subject: [PATCH 3/5] trace2: fix signature of trace2_def_param() macro Add `struct key_value_info` argument to `trace2_def_param()`. In dc90208497 (trace2: plumb config kvi, 2023-06-28) a `kvi` argument was added to `trace2_def_param_fl()` but the macro was not up updated. Let's fix that. Signed-off-by: Jeff Hostetler --- trace2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trace2.h b/trace2.h index 40d8c2e02a5e50..1f0669bbd2d4f0 100644 --- a/trace2.h +++ b/trace2.h @@ -337,8 +337,8 @@ struct key_value_info; void trace2_def_param_fl(const char *file, int line, const char *param, const char *value, const struct key_value_info *kvi); -#define trace2_def_param(param, value) \ - trace2_def_param_fl(__FILE__, __LINE__, (param), (value)) +#define trace2_def_param(param, value, kvi) \ + trace2_def_param_fl(__FILE__, __LINE__, (param), (value), (kvi)) /* * Tell trace2 about a newly instantiated repo object and assign From 40dbb6e14e5fea44be646bd5d646e2044fb36f45 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 6 Nov 2023 17:00:18 -0500 Subject: [PATCH 4/5] t0212: test URL redacting in EVENT format Signed-off-by: Jeff Hostetler --- t/helper/test-trace2.c | 55 +++++++++++++++++++++++++++++++++++++++++ t/t0212-trace2-event.sh | 40 ++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/t/helper/test-trace2.c b/t/helper/test-trace2.c index d5ca0046c89fd4..1adac29a575254 100644 --- a/t/helper/test-trace2.c +++ b/t/helper/test-trace2.c @@ -412,6 +412,56 @@ static int ut_201counter(int argc, const char **argv) return 0; } +static int ut_300redact_start(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_cmd_start(argv); + + return 0; +} + +static int ut_301redact_child_start(int argc, const char **argv) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + int k; + + if (!argc) + die("expect "); + + for (k = 0; argv[k]; k++) + strvec_push(&cmd.args, argv[k]); + + trace2_child_start(&cmd); + + strvec_clear(&cmd.args); + + return 0; +} + +static int ut_302redact_exec(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_exec(argv[0], &argv[1]); + + return 0; +} + +static int ut_303redact_def_param(int argc, const char **argv) +{ + struct key_value_info kvi = KVI_INIT; + + if (argc < 2) + die("expect "); + + trace2_def_param(argv[0], argv[1], &kvi); + + return 0; +} + /* * Usage: * test-tool trace2 @@ -438,6 +488,11 @@ static struct unit_test ut_table[] = { { ut_200counter, "200counter", " [ [ [...]]]" }, { ut_201counter, "201counter", " " }, + + { ut_300redact_start, "300redact_start", "" }, + { ut_301redact_child_start, "301redact_child_start", "" }, + { ut_302redact_exec, "302redact_exec", " " }, + { ut_303redact_def_param, "303redact_def_param", " " }, }; /* clang-format on */ diff --git a/t/t0212-trace2-event.sh b/t/t0212-trace2-event.sh index 6d3374ff773c1e..147643d582643e 100755 --- a/t/t0212-trace2-event.sh +++ b/t/t0212-trace2-event.sh @@ -323,4 +323,44 @@ test_expect_success 'discard traces when there are too many files' ' head -n2 trace_target_dir/git-trace2-discard | tail -n1 | grep \"event\":\"too_many_files\" ' +# In the following "...redact..." tests, skip testing the GIT_TRACE2_REDACT=0 +# case because we would need to exactly model the full JSON event stream like +# we did in the basic tests above and I do not think it is worth it. + +test_expect_success 'unsafe URLs are redacted by default in cmd_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 300redact_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in child_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 301redact_child_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in exec events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 302redact_exec git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in def_param events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 303redact_def_param url https://user:pwd@example.com/ && + ! grep user:pwd trace.event +' + test_done From f85d0239b07d89a36729ae4469a228a8dcb20f78 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 6 Nov 2023 17:38:47 -0500 Subject: [PATCH 5/5] t0210,t0211: unset SANITIZE_LEAK Turn off TEST_PASSES_SANITIZE_LEAK in t0210 and t0211 tests. The tests added in a previous commit to confirm that we redact URLs in the Trace2 output uncovered leaks in `builtin/clone.c` and `remote.c`. We observed that (1) `the_repository->remote_status` is not released properly. And (2) that we are using `url...insteadOf` and that runs into a code path where an allocated URL is replaced with another URL. And (3) `remote_states` contains plenty of `struct remote`s whose refspecs seem to be usually allocated by never released. More investigation is needed here to identify the exact cause and proper fixes these leaks / bugs. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- t/t0210-trace2-normal.sh | 2 +- t/t0211-trace2-perf.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index b9adc94aab4165..c312657a12cd34 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (normal target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index 06ab4edf14f9e8..290b6eaaab1605 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (perf target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test.