Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add options to detect brittleness across multiple runs and portfolio results. #790

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Source/Core/CoreOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public enum VerbosityLevel
SubsumptionOption UseSubsumption { get; }
int VcsCores { get; }
int RandomizeVcIterations { get; }
bool PortfolioVcIterations { get; }
int PortfolioVcBatchSize { get; }
List<string> ProverOptions { get; }
bool Prune { get; }
bool RunDiagnosticsOnTimeout { get; }
Expand Down
31 changes: 30 additions & 1 deletion Source/ExecutionEngine/CommandLineOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ public bool InstrumentWithAsserts

public int RandomizeVcIterations { get; set; } = 1;

public bool PortfolioVcIterations { get; set; } = false;
public int PortfolioVcBatchSize { get; set; } = 1;

public bool PrintWithUniqueASTIds {
get => printWithUniqueAstIds;
set => printWithUniqueAstIds = value;
Expand Down Expand Up @@ -1185,6 +1188,16 @@ protected override bool ParseOption(string name, CommandLineParseState ps)
RandomSeed ??= 0; // Set to 0 if not already set
return true;

case "portfolioVcIterations":
ps.GetIntArgument(x => RandomizeVcIterations = x, a => 1 <= a);
PortfolioVcIterations = true;
RandomSeed ??= 0; // Set to 0 if not already set
return true;

case "portfolioVcBatchSize":
ps.GetIntArgument(x => PortfolioVcBatchSize = x, a => 1 <= a);
return true;

case "vcsLoad":
double load = 0.0;
if (ps.GetDoubleArgument(x => load = x))
Expand Down Expand Up @@ -1884,7 +1897,8 @@ to affect the verification of any other part of the program.
a linear number of splits. The default way (top-down) is more
aggressive and it may create an exponential number of splits.
/randomSeed:<s>
Supply the random seed for /randomizeVcIterations option.
Supply the random seed for /randomizeVcIterations and
/portfolioVcIterations options.
/randomizeVcIterations:<n>
Turn on randomization of the input that Boogie passes to the
SMT solver and turn on randomization in the SMT solver itself.
Expand All @@ -1899,6 +1913,21 @@ without requiring the user to actually make those changes.
This option is implemented by renaming variables and reordering
declarations in the input, and by setting solver options that have
similar effects.
/portfolioVcIterations:<n>
Enables /randomizeVcIterations and returns the first successful
proof result (if any) out of n randomized VCs.
/portfolioVcBatchSize:<m>
Splits n in /portfolioVcIterations:<n> into n/m batches (or
n/m+1 batches if n is not divisible by m).
Defaults to 1.

Iterations in a batch can be executed concurrently, but each
batch is executed sequentially to completion. A batch execution
is considered complete when all iterations in that batch
complete execution. If any of the iterations in a batch returns
a valid verification outcome, remaining batches are cancelled.
Since the iterations and batches are created deterministically,
this implies determinism when using /portfolioVcIterations.
/trackVerificationCoverage
Track and report which program elements labeled with an
`{:id ...}` attribute were necessary to complete verification.
Expand Down
12 changes: 9 additions & 3 deletions Source/VCGeneration/Split.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1269,15 +1269,21 @@ public int GetHashCode(List<Block> obj)
}
}

public async Task<(ProverInterface.Outcome outcome, VCResult result, int resourceCount)> ReadOutcome(int iteration, Checker checker, VerifierCallback callback)
public async Task<(ProverInterface.Outcome outcome, VCResult result, int resourceCount)>
ReadOutcome(int iteration, int batch, Checker checker, VerifierCallback callback)
{
Contract.EnsuresOnThrow<UnexpectedProverOutputException>(true);
ProverInterface.Outcome outcome = cce.NonNull(checker).ReadOutcome();

if (options.Trace && SplitIndex >= 0)
{
run.OutputWriter.WriteLine(" --> split #{0} done, [{1} s] {2}", SplitIndex + 1,
checker.ProverRunTime.TotalSeconds, outcome);
var iterationText = options.RandomizeVcIterations > 1
? batch >= 0
? $" (batch {batch} iteration {iteration}) "
: $" (iteration {iteration}) "
: " ";
run.OutputWriter.WriteLine(" --> split #{0}{3}done, [{1} s] {2}", SplitIndex + 1,
checker.ProverRunTime.TotalSeconds, outcome, iterationText);
}
if (options.Trace && options.TrackVerificationCoverage) {
run.OutputWriter.WriteLine("Proof dependencies:\n {0}",
Expand Down
190 changes: 161 additions & 29 deletions Source/VCGeneration/SplitAndVerifyWorker.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
using System.Reactive.Subjects;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Boogie;
Expand Down Expand Up @@ -112,37 +112,107 @@ private void TrackSplitsCost(List<Split> splits)
}
}

async Task DoWorkForMultipleIterations(Split split, CancellationToken cancellationToken)
private async Task DoWorkForMultipleIterations(Split split, CancellationToken cancellationToken)
{
int currentSplitNumber = DoSplitting ? Interlocked.Increment(ref splitNumber) - 1 : -1;
var currentSplitNumber = DoSplitting ? Interlocked.Increment(ref splitNumber) - 1 : -1;
split.SplitIndex = currentSplitNumber;
var tasks = Enumerable.Range(0, options.RandomizeVcIterations).Select(iteration =>
DoWork(iteration, split, cancellationToken));
await Task.WhenAll(tasks);
if (!options.PortfolioVcIterations)
{
await Task.WhenAll(Enumerable.Range(0, options.RandomizeVcIterations).Select(iteration =>
DoWork(iteration, -1, split, cancellationToken)));
return;
}

var totalIterations = options.RandomizeVcIterations;
var batchSize = options.PortfolioVcBatchSize;
var numBatches = totalIterations / batchSize;
var remainder = totalIterations % batchSize;
if (remainder > 0)
{
numBatches++;
}

if (options.Trace)
{
var splitText = split.SplitIndex == -1 ? "" : $"split #{split.SplitIndex+1}: ";
await run.OutputWriter.WriteLineAsync($" {splitText}Attempting verification in {numBatches} batches of size {batchSize}.");
}

bool successfulProofFound = false;
var allTaskResults = new List<(ProverInterface.Outcome outcome, VCResult result)>();

for (var batch = 0; batch < numBatches && !successfulProofFound; ++batch)
{
var currentBatchSize = (batch == numBatches - 1 && remainder > 0) ? remainder : batchSize;

var batchCopy = batch;
var taskResults = await Task.WhenAll(Enumerable.Range(0, currentBatchSize).Select(iteration =>
DoWork(iteration, batchCopy, split, cancellationToken)));
taskResults.ForEach(res => allTaskResults.Add(res));

successfulProofFound = taskResults.Any(pair => pair.outcome == ProverInterface.Outcome.Valid);
if (successfulProofFound)
{
if (options.Trace)
{
var remainingBatchesText =
batch < numBatches - 1 ? $" Not executing remaining {numBatches - batch - 1} batches." : "";
var splitText = split.SplitIndex == -1 ? "" : $"split #{split.SplitIndex+1}: ";
await run.OutputWriter.WriteLineAsync($" {splitText}Batch {batch} verified.{remainingBatchesText}");
}
} else if (batch == numBatches - 1) {
// This is the last batch and it was not successful.
outcome = taskResults.Select(pair => pair.Item1).Aggregate(outcome, MergeOutcomes);
if (options.Trace)
{
var splitText = split.SplitIndex == -1 ? "" : $"split #{split.SplitIndex+1}: ";
await run.OutputWriter.WriteLineAsync($" {splitText}All batches failed to verify.");
}
} else {
// This is not the last batch and it was not successful.
if (options.Trace)
{
var splitText = split.SplitIndex == -1 ? "" : $"split #{split.SplitIndex+1}: ";
await run.OutputWriter.WriteLineAsync($" {splitText}batch {batch} failed to verify. moving on to batch {batch + 1}....");
}
}
}
if (outcome == Outcome.TimedOut || outcome == Outcome.OutOfMemory || outcome == Outcome.OutOfResource) {
ReportSplitErrorResult(split);
}
if (outcome == Outcome.Correct)
{
await CheckAndReportBrittleness(allTaskResults, split, 20, 30, 5, 1000000);
}
}

async Task DoWork(int iteration, Split split, CancellationToken cancellationToken)
private async Task<(ProverInterface.Outcome outcome, VCResult result)>
DoWork(int iteration, int batch, Split split, CancellationToken cancellationToken)
{
var checker = await split.parent.CheckerPool.FindCheckerFor(split.parent, split, cancellationToken);

try {
cancellationToken.ThrowIfCancellationRequested();
await StartCheck(iteration, split, checker, cancellationToken);
await StartCheck(iteration, batch, split, checker, cancellationToken);
await checker.ProverTask;
await ProcessResultAndReleaseChecker(iteration, split, checker, cancellationToken);
var (newOutcome, result, newResourceCount) = await split.ReadOutcome(iteration, batch, checker, callback);
var cex = IsProverFailed(newOutcome) ? split.ToCounterexample(checker.TheoremProver.Context) : null;
TotalProverElapsedTime += checker.ProverRunTime;
await checker.GoBackToIdle();
await ProcessResult(split, newOutcome, result, newResourceCount, cex, cancellationToken);
return (newOutcome, result);
}
catch {
await checker.GoBackToIdle();
throw;
}
}

private async Task StartCheck(int iteration, Split split, Checker checker, CancellationToken cancellationToken)
private async Task StartCheck(int iteration, int batch, Split split, Checker checker, CancellationToken cancellationToken)
{
if (options.Trace && DoSplitting) {
var splitNum = split.SplitIndex + 1;
var splitIdxStr = options.RandomizeVcIterations > 1 ? $"{splitNum} (iteration {iteration})" : $"{splitNum}";
var iterationIdxStr = options.RandomizeVcIterations > 1 ? batch >= 0 ? $" (batch {batch} iteration {iteration})" : $" (iteration {iteration})" : "";
var splitIdxStr = $"{splitNum}{iterationIdxStr}";
run.OutputWriter.WriteLine(" checking split {1}/{2}, {3:0.00}%, {0} ...",
split.Stats, splitIdxStr, total, 100 * provenCost / (provenCost + remainingCost));
}
Expand All @@ -158,20 +228,22 @@ private async Task StartCheck(int iteration, Split split, Checker checker, Cance

private Implementation Implementation => run.Implementation;

private async Task ProcessResultAndReleaseChecker(int iteration, Split split, Checker checker, CancellationToken cancellationToken)
private async Task ProcessResult(Split split, ProverInterface.Outcome checkerOutcome,
VCResult result, int checkerResourceCount, Counterexample checkerCex, CancellationToken cancellationToken)
{
if (TrackingProgress) {
lock (this) {
remainingCost -= split.Cost;
}
}

var (newOutcome, result, newResourceCount) = await split.ReadOutcome(iteration, checker, callback);
lock (this) {
outcome = MergeOutcomes(outcome, newOutcome);
totalResourceCount += newResourceCount;
if (!options.PortfolioVcIterations) {
outcome = MergeOutcomes(outcome, checkerOutcome);
}
totalResourceCount += checkerResourceCount;
}
var proverFailed = IsProverFailed(newOutcome);
var proverFailed = IsProverFailed(checkerOutcome);

if (TrackingProgress) {
lock (this) {
Expand All @@ -188,11 +260,9 @@ private async Task ProcessResultAndReleaseChecker(int iteration, Split split, Ch
callback.OnProgress?.Invoke("VCprove", splitNumber < 0 ? 0 : splitNumber, total, provenCost / (remainingCost + provenCost));

if (proverFailed) {
await HandleProverFailure(split, checker, callback, result, cancellationToken);
} else {
batchCompletions.OnNext((split, result));
await checker.GoBackToIdle();
await HandleProverFailure(split, checkerCex, callback, result, cancellationToken);
}
batchCompletions.OnNext((split, result));
}

private static bool IsProverFailed(ProverInterface.Outcome outcome)
Expand Down Expand Up @@ -254,15 +324,14 @@ private static Outcome MergeOutcomes(Outcome currentOutcome, ProverInterface.Out
}
}

private async Task HandleProverFailure(Split split, Checker checker, VerifierCallback callback, VCResult vcResult, CancellationToken cancellationToken)
private async Task HandleProverFailure(Split split, Counterexample cex, VerifierCallback callback, VCResult vcResult, CancellationToken cancellationToken)
{
if (split.LastChance) {
if (split.LastChance && !options.PortfolioVcIterations) {
string msg = "some timeout";
if (split.reporter is { resourceExceededMessage: { } }) {
msg = split.reporter.resourceExceededMessage;
}

var cex = split.ToCounterexample(checker.TheoremProver.Context);
callback.OnCounterexample(cex, msg);
split.Counterexamples.Add(cex);
// Update one last time the result with the dummy counter-example to indicate the position of the timeout
Expand All @@ -271,12 +340,9 @@ private async Task HandleProverFailure(Split split, Checker checker, VerifierCal
};
batchCompletions.OnNext((split, result));
outcome = Outcome.Errors;
await checker.GoBackToIdle();
return;
}

await checker.GoBackToIdle();

if (maxKeepGoingSplits > 1) {
var newSplits = Split.DoSplit(split, maxVcCost, maxKeepGoingSplits);
if (options.Trace) {
Expand All @@ -293,6 +359,15 @@ private async Task HandleProverFailure(Split split, Checker checker, VerifierCal
return;
}

if (options.PortfolioVcIterations) {
// Errors are handled later when using PortfolioVcIterations.
return;
}
ReportSplitErrorResult(split);
}

private void ReportSplitErrorResult(Split split)
{
Contract.Assert(outcome != Outcome.Correct);
if (outcome == Outcome.TimedOut) {
string msg = "some timeout";
Expand All @@ -313,11 +388,68 @@ private async Task HandleProverFailure(Split split, Checker checker, VerifierCal
if (split.reporter is { resourceExceededMessage: { } }) {
msg = split.reporter.resourceExceededMessage;
}

callback.OnOutOfResource(msg);
}
}

private async Task CheckAndReportBrittleness(List<(ProverInterface.Outcome outcome, VCResult result)> taskResults,
Split split, int runTimeCvvhreshold, int resourceCountCvThreshold, int runTimeMeanThreshold, int resourceCountMeanThreshold)
{
var numVerified = taskResults.Count(pair => pair.outcome == ProverInterface.Outcome.Valid);
var numTotalGoals = taskResults.Count;
var numFailed = numTotalGoals - numVerified;
var successRatio = (double) numVerified / numTotalGoals;

// Analyze variances in runTime and resourceCount
double runTimeSum = 0;
double resourceCountSum = 0;
var taskCount = taskResults.Count;

foreach (var (_, result) in taskResults)
{
runTimeSum += result.runTime.TotalSeconds;
resourceCountSum += result.resourceCount;
}

batchCompletions.OnNext((split, vcResult));
var runTimeMean = runTimeSum / taskCount;
var resourceCountMean = resourceCountSum / taskCount;

double runTimeVarianceSum = 0;
double resourceCountVarianceSum = 0;

foreach (var (_, result) in taskResults)
{
runTimeVarianceSum += Math.Pow(result.runTime.TotalSeconds - runTimeMean, 2);
resourceCountVarianceSum += Math.Pow(result.resourceCount - resourceCountMean, 2);
}

var runTimeStdDev = Math.Sqrt(runTimeVarianceSum / (taskCount - 1));
var resourceCountStdDev = Math.Sqrt(resourceCountVarianceSum / (taskCount - 1));

var runTimeCv = (runTimeMean == 0) ? 0 : (runTimeStdDev / runTimeMean) * 100;
var resourceCountCv = (resourceCountMean == 0) ? 0 : (resourceCountStdDev / resourceCountMean) * 100;

// Output results
var splitText = split.SplitIndex == -1 ? "" : $"split #{split.SplitIndex+1}: ";
var sb = new StringBuilder();

if (numFailed > 0)
{
sb.AppendLine($" {splitText}Warning: goal is brittle! Ratio of verified / total goals: {numVerified}/{numTotalGoals} ({successRatio:F2})");
}
if (runTimeMean > runTimeMeanThreshold && runTimeCv > runTimeCvvhreshold)
{
var runTimesList = string.Join(", ", taskResults.Select(t => t.result.runTime.TotalSeconds.ToString("F2")));
sb.AppendLine($" Warning: {splitText}Exceeded coefficient of variation for runtimes. Coefficient of Variation: {runTimeCv:F2}%");
sb.AppendLine($" Runtimes (seconds): {runTimesList}");
}
if (resourceCountMean > resourceCountMeanThreshold && resourceCountCv > resourceCountCvThreshold)
{
var resourceCountsList = string.Join(", ", taskResults.Select(t => t.result.resourceCount));
sb.AppendLine($" Warning: {splitText}Exceeded coefficient of variation for resource counts. Coefficient of Variation: {resourceCountCv:F2}%");
sb.AppendLine($" Resource Counts: {resourceCountsList}");
}
await run.OutputWriter.WriteAsync(sb.ToString());
}
}
}
Loading