-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/JetBrains-Research/verified…
- Loading branch information
Showing
6 changed files
with
127 additions
and
1 deletion.
There are no files selected for viewing
5 changes: 5 additions & 0 deletions
5
prompts/dafny_eval_comment_without_impls_textd/ask_for_fixed.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
This answer got Dafny verification error: | ||
{error} | ||
Please try again by taking the Dafny feedback. | ||
|
||
If an error appears in one of method ending with `_valid`, that means annotations of an existing function were altered. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Given a Dafny program with function signature, preconditions and postconditions, but with some implementations and annotations missing. | ||
Please return a complete Dafny program with all functions implemented, and annotated using the strongest possible annotations (loop invariants, assert statements, etc.). | ||
Use extensive comments in the code, but don't explain anything outside of it. | ||
Please use exactly the same function signature, preconditions, and postconditions. Do not ever modify the given lines. | ||
Below is the program: | ||
{program} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
You are an expert in Dafny. | ||
You will be given tasks dealing with Dafny programs including precise annotations. | ||
You respond only with code blocks. | ||
You will write a comment explaining each significant point of the program on a line before. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# %% | ||
import matplotlib.pyplot as plt | ||
import json | ||
from itertools import accumulate | ||
import pathlib | ||
import seaborn as sns | ||
import pandas as pd | ||
|
||
# %% | ||
|
||
|
||
def read_result(file): | ||
with open(file) as f: | ||
return {name[:3]: t if t >= 0 else 99 for name, t in json.load(f).items()} | ||
|
||
|
||
# %% | ||
|
||
|
||
def plot_results_heatmap(results_dict, width=35): | ||
df = pd.DataFrame.from_dict({ | ||
source: entry_dict | ||
for source, entry_dict in results_dict.items() | ||
}, orient='index') | ||
|
||
# Create the heatmap | ||
plt.figure(figsize=(width, 4)) | ||
sns.heatmap( | ||
df, | ||
cmap='YlOrRd', # Color scheme - you can change this | ||
annot=True, # Show numbers in cells | ||
fmt='d', # Format as integers | ||
cbar_kws={'label': 'Result'} | ||
) | ||
|
||
plt.title('Results Heatmap') | ||
plt.xlabel('Entries') | ||
plt.ylabel('Sources') | ||
plt.tight_layout() | ||
|
||
|
||
# %% | ||
data = pathlib.Path('heatmap_data') | ||
results = {str(x.name).removesuffix(".json"): read_result(x) for x in sorted(data.glob('*.json'))} | ||
# %% | ||
plot_results_heatmap(results, width=33) | ||
plt.show() | ||
|
||
# %% | ||
all_tasks = list(next(iter(results.values())).keys()) | ||
solve_count = {t: sum(r[t] <= 10 for r in results.values()) for t in all_tasks} | ||
# %% | ||
nontrivial_tasks = [t for t in all_tasks if 0 < solve_count[t] < len(results)] | ||
nontrivial_results = {s: {t: r[t] for t in nontrivial_tasks} for s, r in results.items()} | ||
variant_tasks = [t for t in all_tasks if 2 <= solve_count[t] <= len(results) - 2] | ||
variant_results = {s: {t: r[t] for t in variant_tasks} for s, r in results.items()} | ||
# %% | ||
plot_results_heatmap(nontrivial_results, width=19) | ||
plt.show() | ||
# %% | ||
plot_results_heatmap(variant_results, width=10) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# %% | ||
import matplotlib.pyplot as plt | ||
import json | ||
from itertools import accumulate | ||
import pathlib | ||
|
||
# %% | ||
|
||
|
||
def read_bench(d, ext): | ||
bench = pathlib.Path(d) | ||
return [x.name.removesuffix(f'.{ext}') for x in bench.glob(f'*.{ext}')] | ||
|
||
|
||
# %% | ||
|
||
intersect_bench = True | ||
|
||
path = "results/dafny_4_comments.json" | ||
dafny_bench = pathlib.Path("benches/HumanEval-Dafny") | ||
nagini_bench = pathlib.Path("benches/HumanEval-Nagini/Bench") | ||
dafny_files = read_bench("benches/HumanEval-Dafny", "dfy") | ||
nagini_files = read_bench("benches/HumanEval-Nagini/Bench", "py") | ||
shared_files = [x for x in dafny_files if x in nagini_files] | ||
files = shared_files if intersect_bench else dafny_files | ||
# %% | ||
file_cnt = len(files) | ||
with open(path) as f: | ||
data = json.load(f) | ||
|
||
tries = [value for file, value in data.items() if file.removesuffix('.dfy') in files and value != -1] | ||
max_tries = max(tries) | ||
cnt = [0] * (max_tries + 1) | ||
for t in tries: | ||
cnt[t] += 1 | ||
cnt = list(accumulate(cnt)) | ||
perc = [100 * (c / file_cnt) for c in cnt] | ||
|
||
fig, ax = plt.subplots() # type: ignore | ||
plt.title("Percentage of files verified") # type: ignore | ||
plt.xlabel("Number of tries") # type: ignore | ||
plt.ylabel("Percentage of files") # type: ignore | ||
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x}%")) # type: ignore | ||
ax.plot(perc) # type: ignore | ||
# %% | ||
plt.show() | ||
# %% | ||
print(f"{cnt[-1]} / {file_cnt}") | ||
print(f"{round(perc[-1],2)}% of the files were successfully verified") |