Skip to content
This repository has been archived by the owner on Oct 11, 2023. It is now read-only.

Commit

Permalink
If crasher input isn't detected, save out entire batch
Browse files Browse the repository at this point in the history
If one-by-one execution is unable to reproduce a detected crash, then it's possible multiple inputs are required to trigger the crash.

Rather than trying anything fancy, this CL writes out all of the inputs from a batch that were executed prior to the crash. The intent is to allow manual repro validation.

A TODO is added to experiment with techniques to validate whether executing of inputs can reproduce the crash, such as simply re-executing the whole batch.

PiperOrigin-RevId: 519897703
  • Loading branch information
bookholt authored and copybara-github committed Mar 28, 2023
1 parent 65d81ac commit b1e8503
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 2 deletions.
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ cc_library(
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/types:span",
],
Expand Down
30 changes: 28 additions & 2 deletions centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_split.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
Expand Down Expand Up @@ -763,8 +764,33 @@ void Centipede::ReportCrash(std::string_view binary,

LOG(INFO) << log_prefix
<< "Crash was not observed when running inputs one-by-one";
// TODO(kcc): [as-needed] there will be cases when several inputs cause a
// crash, but no single input does. Handle this case.

// There will be cases when several inputs collectively cause a crash, but no
// single input does. Handle this by writing out all inputs from the batch.

// TODO(bookholt): Check for repro by re-running the whole batch.
// TODO(ussuri): Consolidate logic for test case reproduction.

const auto &suspect_input = input_vec[suspect_input_idx];
// Save inputs to <--workdir>/crash/unreliable_batch-<HASH_OF_SUSPECT_INPUT>.
auto suspect_hash = Hash(suspect_input);
auto crash_dir = env_.MakeCrashReproducerDirPath();
RemoteMkdir(crash_dir);
std::string save_dir =
std::filesystem::path(crash_dir).append("crashing_batch-").
concat(suspect_hash);
RemoteMkdir(save_dir);
LOG(INFO) << log_prefix << "Saving used inputs from batch to: " << save_dir;
for (int i = 0; i <= suspect_input_idx; ++i) {
const auto &one_input = input_vec[i];
auto hash = Hash(one_input);
std::string file_path = std::filesystem::path(save_dir)
.append(absl::StrFormat("input-%010d-%s", i, hash));
auto *file = RemoteFileOpen(file_path, "w");
CHECK(file != nullptr) << log_prefix << "Failed to open " << file_path;
RemoteFileAppend(file, one_input);
RemoteFileClose(file);
}
}

} // namespace centipede
2 changes: 2 additions & 0 deletions execution_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ class BatchResult {
}

private:
friend class MultiInputMock;

std::vector<ExecutionResult> results_;
std::string log_; // log_ is populated optionally, e.g. if there was a crash.
int exit_code_ = EXIT_SUCCESS; // Process exit code.
Expand Down
109 changes: 109 additions & 0 deletions testing/centipede_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,115 @@ TEST(Centipede, ExtraBinaries) {
Hash({10}), Hash({30}), Hash({50})));
}

namespace {

// A mock for UndetectedCrashingInput test.
class UndetectedCrashingInputMock : public CentipedeCallbacks {
public:
explicit UndetectedCrashingInputMock(const Environment &env,
size_t crashing_input_idx)
: CentipedeCallbacks{env}, crashing_input_idx_{crashing_input_idx} {
CHECK_LE(crashing_input_idx_, std::numeric_limits<uint8_t>::max());
}

// Doesn't execute anything.
// Crash when 0th char of input to binary b1 equals 10, but only on 1st exec.
bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
BatchResult &batch_result) override {
batch_result.ClearAndResize(inputs.size());
bool res = true;
for (const auto & input : inputs) {
CHECK_EQ(input.size(), 1); // By construction in `Mutate()`.
// The contents of each mutant is its sequantial number.
if (input[0] == crashing_input_idx_) {
if (first_pass_) {
first_pass_ = false;
crashing_input_ = input;
// TODO(b/274705740): `num_outputs_read()` is the number of outputs
// that Centipede engine *expects* to have been read from *the
// current BatchResult* by the *particular* implememtation of
// `CentipedeCallbacks` (and `DefaultCentipedeCallbacks` fits the
// bill). `Centipede::ReportCrash()` then uses this value as a hint
// for the crashing input's index, and in our case saves the batch's
// inputs from 0 up to and including the crasher to a subdir. See the
// bug for details. All of this is horribly convoluted and misplaced
// here. Implement a cleaner solution.
batch_result.num_outputs_read() =
crashing_input_idx_ % env_.batch_size;
res = false;
}
}
}
return res;
}

// Sets the mutants to different 1-byte values.
void Mutate(const std::vector<ByteArray> &inputs, size_t num_mutants,
std::vector<ByteArray> &mutants) override {
mutants.resize(num_mutants);
for (auto &mutant : mutants) {
// The contents of each mutant is simply its sequential number.
mutant = {static_cast<uint8_t>(curr_input_idx_++)};
}
}

// Gets the input that triggered the crash.
ByteArray crashing_input() const { return crashing_input_; }

private:
const size_t crashing_input_idx_;
size_t curr_input_idx_ = 0;
ByteArray crashing_input_ = {};
bool first_pass_ = true;
};

} // namespace

// Test for preserving a crashing batch when 1-by-1 exec fails to reproduce.
// Executes one main binary (--binary).
// Expects the binary to crash once and 1-by-1 reproduction to fail.
TEST(Centipede, UndetectedCrashingInput) {
constexpr size_t kNumBatches = 7;
constexpr size_t kBatchSize = 11;
constexpr size_t kCrashingInputIdxInBatch = kBatchSize / 2;
constexpr size_t kCrashingInputIdx =
(kNumBatches / 2) * kBatchSize + kCrashingInputIdxInBatch;

LOG(INFO) << VV(kNumBatches) << VV(kBatchSize)
<< VV(kCrashingInputIdxInBatch) VV(kCrashingInputIdx);

TempDir temp_dir{test_info_->name()};
Environment env;
env.workdir = temp_dir.path();
env.num_runs = kBatchSize * kNumBatches;
env.batch_size = kBatchSize;
// No real binary: prevent attempts by Centipede to read a PCtable from it.
env.require_pc_table = false;

UndetectedCrashingInputMock mock(env, kCrashingInputIdx);
MockFactory factory(mock);
CentipedeMain(env, factory);

// Verify that we see the expected inputs from the batch.
// The "crashes/unreliable_batch-<HASH>" dir must contain all inputs from the
// batch that were executing during the session.
// We simply verify the number of saved inputs matches the number of executed
// inputs.
const auto crashing_input_hash = Hash(mock.crashing_input());
const auto crashes_dir_path = std::filesystem::path(temp_dir.path())
.append("crashes")
.append("crashing_batch-")
.concat(crashing_input_hash);
ASSERT_TRUE(std::filesystem::exists(crashes_dir_path)) << crashes_dir_path;
std::vector<std::string> found_crash_file_names;
for (auto const &dir_ent :
std::filesystem::directory_iterator(crashes_dir_path)) {
found_crash_file_names.push_back(dir_ent.path().filename());
}
// TODO(ussuri): Verify exact names/contents of the files, not just count.
ASSERT_EQ(found_crash_file_names.size(), kCrashingInputIdxInBatch + 1);
}

static void WriteBlobsToFile(const std::vector<ByteArray> &blobs,
const std::string_view path) {
auto appender = DefaultBlobFileAppenderFactory();
Expand Down

0 comments on commit b1e8503

Please sign in to comment.