From b1e85034e62608e3d077c18c98e68d2a546f379b Mon Sep 17 00:00:00 2001 From: Chris Bookholt Date: Mon, 27 Mar 2023 19:59:41 -0700 Subject: [PATCH] If crasher input isn't detected, save out entire batch If one-by-one execution is unable to reproduce a detected crash, then it's possible multiple inputs are required to trigger the crash. Rather than trying anything fancy, this CL writes out all of the inputs from a batch that were executed prior to the crash. The intent is to allow manual repro validation. A TODO is added to experiment with techniques to validate whether executing of inputs can reproduce the crash, such as simply re-executing the whole batch. PiperOrigin-RevId: 519897703 --- BUILD | 1 + centipede.cc | 30 ++++++++++- execution_result.h | 2 + testing/centipede_test.cc | 109 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 2 deletions(-) diff --git a/BUILD b/BUILD index addf778..af91974 100644 --- a/BUILD +++ b/BUILD @@ -477,6 +477,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", ], diff --git a/centipede.cc b/centipede.cc index ec7e732..6dc29b0 100644 --- a/centipede.cc +++ b/centipede.cc @@ -58,6 +58,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/str_split.h" #include "absl/synchronization/mutex.h" #include "absl/types/span.h" @@ -763,8 +764,33 @@ void Centipede::ReportCrash(std::string_view binary, LOG(INFO) << log_prefix << "Crash was not observed when running inputs one-by-one"; - // TODO(kcc): [as-needed] there will be cases when several inputs cause a - // crash, but no single input does. Handle this case. + + // There will be cases when several inputs collectively cause a crash, but no + // single input does. Handle this by writing out all inputs from the batch. + + // TODO(bookholt): Check for repro by re-running the whole batch. + // TODO(ussuri): Consolidate logic for test case reproduction. + + const auto &suspect_input = input_vec[suspect_input_idx]; + // Save inputs to <--workdir>/crash/unreliable_batch-. + auto suspect_hash = Hash(suspect_input); + auto crash_dir = env_.MakeCrashReproducerDirPath(); + RemoteMkdir(crash_dir); + std::string save_dir = + std::filesystem::path(crash_dir).append("crashing_batch-"). + concat(suspect_hash); + RemoteMkdir(save_dir); + LOG(INFO) << log_prefix << "Saving used inputs from batch to: " << save_dir; + for (int i = 0; i <= suspect_input_idx; ++i) { + const auto &one_input = input_vec[i]; + auto hash = Hash(one_input); + std::string file_path = std::filesystem::path(save_dir) + .append(absl::StrFormat("input-%010d-%s", i, hash)); + auto *file = RemoteFileOpen(file_path, "w"); + CHECK(file != nullptr) << log_prefix << "Failed to open " << file_path; + RemoteFileAppend(file, one_input); + RemoteFileClose(file); + } } } // namespace centipede diff --git a/execution_result.h b/execution_result.h index 28761a6..b629368 100644 --- a/execution_result.h +++ b/execution_result.h @@ -155,6 +155,8 @@ class BatchResult { } private: + friend class MultiInputMock; + std::vector results_; std::string log_; // log_ is populated optionally, e.g. if there was a crash. int exit_code_ = EXIT_SUCCESS; // Process exit code. diff --git a/testing/centipede_test.cc b/testing/centipede_test.cc index ce549e6..b15f215 100644 --- a/testing/centipede_test.cc +++ b/testing/centipede_test.cc @@ -579,6 +579,115 @@ TEST(Centipede, ExtraBinaries) { Hash({10}), Hash({30}), Hash({50}))); } +namespace { + +// A mock for UndetectedCrashingInput test. +class UndetectedCrashingInputMock : public CentipedeCallbacks { + public: + explicit UndetectedCrashingInputMock(const Environment &env, + size_t crashing_input_idx) + : CentipedeCallbacks{env}, crashing_input_idx_{crashing_input_idx} { + CHECK_LE(crashing_input_idx_, std::numeric_limits::max()); + } + + // Doesn't execute anything. + // Crash when 0th char of input to binary b1 equals 10, but only on 1st exec. + bool Execute(std::string_view binary, const std::vector &inputs, + BatchResult &batch_result) override { + batch_result.ClearAndResize(inputs.size()); + bool res = true; + for (const auto & input : inputs) { + CHECK_EQ(input.size(), 1); // By construction in `Mutate()`. + // The contents of each mutant is its sequantial number. + if (input[0] == crashing_input_idx_) { + if (first_pass_) { + first_pass_ = false; + crashing_input_ = input; + // TODO(b/274705740): `num_outputs_read()` is the number of outputs + // that Centipede engine *expects* to have been read from *the + // current BatchResult* by the *particular* implememtation of + // `CentipedeCallbacks` (and `DefaultCentipedeCallbacks` fits the + // bill). `Centipede::ReportCrash()` then uses this value as a hint + // for the crashing input's index, and in our case saves the batch's + // inputs from 0 up to and including the crasher to a subdir. See the + // bug for details. All of this is horribly convoluted and misplaced + // here. Implement a cleaner solution. + batch_result.num_outputs_read() = + crashing_input_idx_ % env_.batch_size; + res = false; + } + } + } + return res; + } + + // Sets the mutants to different 1-byte values. + void Mutate(const std::vector &inputs, size_t num_mutants, + std::vector &mutants) override { + mutants.resize(num_mutants); + for (auto &mutant : mutants) { + // The contents of each mutant is simply its sequential number. + mutant = {static_cast(curr_input_idx_++)}; + } + } + + // Gets the input that triggered the crash. + ByteArray crashing_input() const { return crashing_input_; } + + private: + const size_t crashing_input_idx_; + size_t curr_input_idx_ = 0; + ByteArray crashing_input_ = {}; + bool first_pass_ = true; +}; + +} // namespace + +// Test for preserving a crashing batch when 1-by-1 exec fails to reproduce. +// Executes one main binary (--binary). +// Expects the binary to crash once and 1-by-1 reproduction to fail. +TEST(Centipede, UndetectedCrashingInput) { + constexpr size_t kNumBatches = 7; + constexpr size_t kBatchSize = 11; + constexpr size_t kCrashingInputIdxInBatch = kBatchSize / 2; + constexpr size_t kCrashingInputIdx = + (kNumBatches / 2) * kBatchSize + kCrashingInputIdxInBatch; + + LOG(INFO) << VV(kNumBatches) << VV(kBatchSize) + << VV(kCrashingInputIdxInBatch) VV(kCrashingInputIdx); + + TempDir temp_dir{test_info_->name()}; + Environment env; + env.workdir = temp_dir.path(); + env.num_runs = kBatchSize * kNumBatches; + env.batch_size = kBatchSize; + // No real binary: prevent attempts by Centipede to read a PCtable from it. + env.require_pc_table = false; + + UndetectedCrashingInputMock mock(env, kCrashingInputIdx); + MockFactory factory(mock); + CentipedeMain(env, factory); + + // Verify that we see the expected inputs from the batch. + // The "crashes/unreliable_batch-" dir must contain all inputs from the + // batch that were executing during the session. + // We simply verify the number of saved inputs matches the number of executed + // inputs. + const auto crashing_input_hash = Hash(mock.crashing_input()); + const auto crashes_dir_path = std::filesystem::path(temp_dir.path()) + .append("crashes") + .append("crashing_batch-") + .concat(crashing_input_hash); + ASSERT_TRUE(std::filesystem::exists(crashes_dir_path)) << crashes_dir_path; + std::vector found_crash_file_names; + for (auto const &dir_ent : + std::filesystem::directory_iterator(crashes_dir_path)) { + found_crash_file_names.push_back(dir_ent.path().filename()); + } + // TODO(ussuri): Verify exact names/contents of the files, not just count. + ASSERT_EQ(found_crash_file_names.size(), kCrashingInputIdxInBatch + 1); +} + static void WriteBlobsToFile(const std::vector &blobs, const std::string_view path) { auto appender = DefaultBlobFileAppenderFactory();