Skip to content

Commit

Permalink
Merge pull request #8 from JetBrains-Research/ci-fix-bechmark
Browse files Browse the repository at this point in the history
Ci fix bechmark
  • Loading branch information
galtimur authored Jun 5, 2024
2 parents 99b54ca + 16b6869 commit 6d841da
Show file tree
Hide file tree
Showing 10 changed files with 671 additions and 87 deletions.
58 changes: 58 additions & 0 deletions ci-fixing/ci-fixing-benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
## Config

To initialize the benchmark, you need to pass a path to a config file with the following fields (see example in `benchmark.yaml`):

**repos_folder**: here the cloned repos would be stored
**out_folder**: here the result files would be stored
**data_cache_dir**: here the cached dataset would be stored
**username_gh**: your GitHub username
**test_username**: Optional. Username that would be displayed in the benchmark. If ommitted, username_gh would be used. We prefer it in that way.
**language**: dataset language (now only Python is available)

## Benchmark usage

Find the example of the benchmark usage code, see `run_benchmark.py` script

To use the benchmark you need to pass a function that fixes the repo according
the repo state on a local machine, logs and metadata of the failed workflows `fix_repo_function`).

It should have the following (all optional) arguments:
(datapoint, repo_path, repo, out_folder)

**datapoint**: dp from the dataset (its structure would be given below)
**repo_path**: path to the repo in the user's machine
**repo**: git.Repo object from GitPython library
**out_folder**: folder for the benchmark results output

For now, only two functions have been implemented:

`fix_none` - does nothing
`fix_apply_diff` - applies the diff that fixed the issue in the original repo

## Evaluate dataset

method `CIFixBenchmark.eval_dataset(fix_repo_function)` evaluates dataset:

1. Downloads dataset (from https://huggingface.co/datasets/JetBrains-Research/lca-ci-fixing)
2. Sends the datapoints on GitHub to run workflows
3. Requests results from GitHub
4. Analyzes results and print them.

Further, we may duplicate the request part on our side.
For debugging, please, limit yourself to a small amount of datapoints (argument `num_dp=num_dp`)

## Outputs

Method's outputs:

1. `jobs_ids.jsonl` - identificators of the jobs that were sent to the GitHub. It is used for the further evaluation
2. `jobs_results.jsonl` - results of each job.
3. `jobs_awaiting.jsonl` - list of awaiting jobs (normally should be empty)
3. `jobs_invalid.jsonl` - list of invalid jobs (normally should be empty)

Examples of these files can be found in the `/examples` folder

You can also evaluate your results using the method `CIFixBenchmark.eval_jobs(result_filename=result_filename)`
passing `jobs_ids.jsonl` file.

You can download the dataset using the `CIFixBenchmark.get_dataset()` method (example at the end of the file)
112 changes: 83 additions & 29 deletions ci-fixing/ci-fixing-benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,102 @@
from omegaconf import OmegaConf
import json
import os
import time

import pandas as pd
from datasets import load_dataset
import time
import json
from omegaconf import OmegaConf
from tqdm import tqdm
from typing import List

from benhmark_functions import process_datapoint, get_results
from benchmark_utils import read_jsonl, save_jsonl
from benhmark_functions import get_results, process_datapoint


def filter_files(directory, files):
return [file for file in files if file != "meta_info.json"]

def filter_by_id(example, ids):
return example['id'] in ids

class CIFixBenchmark:
def __init__(self, model_name, config_path, token_gh):
# languages = ["Python", "Kotlin", "Rust", "C++", "Java"]
benchmark_owner = "LCA-CI-fix-benchmark"
self.config = OmegaConf.load(config_path)
if not "test_username" in self.config:
self.config.test_username = self.config.username_gh
language = self.config.language
self.credentials = {"username": self.config.username, "token": token_gh, "model": model_name}
# TODO parents=True (??)
self.credentials = {
"username": self.config.username_gh,
"token": token_gh,
"model": model_name,
}

os.makedirs(self.config.out_folder, exist_ok=True)
os.makedirs(self.config.repos_folder, exist_ok=True)
self.dataset_id = f"JetBrains-Research/lca-ci-fixing"
OmegaConf.update(self.config, "benchmark_owner", benchmark_owner, force_add=True)
OmegaConf.update(
self.config, "benchmark_owner", benchmark_owner, force_add=True
)
if hasattr(self.config, "data_cache_dir"):
self.cache_dir = self.config.data_cache_dir
else:
self.cache_dir = None
self.model_name = model_name

def get_dataset(self, hf_token=None, num_dp=None, force_download=False, dataset_folder=None):
# TODO remove hf_token when dataset becomes public
def get_dataset(
self, num_dp=None, force_download=False, dataset_folder=None
):

if dataset_folder is not None:
self.dataset = load_dataset(path=dataset_folder)["train"]
#TODO needs refactoring
if num_dp is not None:
self.dataset = self.dataset.select(range(num_dp))
return self.dataset
if force_download:
download_mode = "force_redownload"
else:
download_mode = None
self.dataset = load_dataset(
self.dataset_id, token=hf_token, cache_dir=self.cache_dir, download_mode=download_mode, split="test"
self.dataset_id,
cache_dir=self.cache_dir,
download_mode=download_mode,
split="test"
)
if num_dp is not None:
self.dataset = self.dataset.select(range(num_dp))

return self.dataset

# TODO remove test_dataset argument after debug
def run_dataset(self, fix_repo_function, test_dataset=None):
if test_dataset is None:
test_dataset = self.dataset
self.jobs_ids = []
jobs_ids_file_path = os.path.join(self.config.out_folder, f"jobs_ids_{self.model_name}.jsonl")
jobs_ids_file_path = os.path.join(
self.config.out_folder, f"jobs_ids_{self.model_name}.jsonl"
)
with open(jobs_ids_file_path, "w") as writer:
for datapoint in tqdm(test_dataset):
job_identificator = process_datapoint(datapoint, fix_repo_function, self.config, self.credentials)
job_identificator = process_datapoint(
datapoint, fix_repo_function, self.config, self.credentials
)
self.jobs_ids.append(job_identificator)
json.dump(job_identificator, writer)
writer.write("\n")
return self.jobs_ids

# TODO remove jobs_ids argument after debug
def eval_jobs(self, jobs_ids=None, job_ids_file=None, result_filename=None):
if result_filename is None:
result_filename = f"jobs_results_{self.model_name}.jsonl"
# Maybe we need to make some pause
jobs_results_file_path = os.path.join(self.config.out_folder, result_filename)
jobs_awaiting_file_path = os.path.join(self.config.out_folder, f"jobs_awaiting_{self.model_name}.jsonl")
jobs_invalid_file_path = os.path.join(self.config.out_folder, f"jobs_invalid_{self.model_name}.jsonl")
jobs_awaiting_file_path = os.path.join(
self.config.out_folder, f"jobs_awaiting_{self.model_name}.jsonl"
)
jobs_invalid_file_path = os.path.join(
self.config.out_folder, f"jobs_invalid_{self.model_name}.jsonl"
)
result_file = open(jobs_results_file_path, "w")
if job_ids_file is not None:
jobs_ids = read_jsonl(job_ids_file)
Expand Down Expand Up @@ -102,8 +127,10 @@ def eval_jobs(self, jobs_ids=None, job_ids_file=None, result_filename=None):
result_file.close()
save_jsonl(jobs_awaiting_file_path, jobs_ids_await)
save_jsonl(jobs_invalid_file_path, jobs_ids_invalid)
print(f"Waiting 300 s to next request of evaluation. {len(jobs_ids_await)} jobs in waiting list.")
time.sleep(300)
print(
f"Waiting 360 s to next request of evaluation. {len(jobs_ids_await)} jobs in waiting list."
)
time.sleep(360)
result_file = open(jobs_results_file_path, "a")

n_attempts += 1
Expand All @@ -116,17 +143,34 @@ def eval_jobs(self, jobs_ids=None, job_ids_file=None, result_filename=None):
self.jobs_results = jobs_results
return jobs_results

def get_results(self, job_ids_file=None, result_filename=None):

if job_ids_file is None:
job_ids_file = os.path.join(
self.config.out_folder, f"jobs_ids_{self.model_name}.jsonl"
)

self.eval_jobs(self, job_ids_file, result_filename)
if result_filename is None:
result_filename = f"jobs_results_{self.model_name}.jsonl"
result_file = os.path.join(self.config.out_folder, result_filename)
self.analyze_results(jobs_results_file=result_file)

def analyze_results(self, jobs_results=None, jobs_results_file=None):
if jobs_results_file is not None:
jobs_results = read_jsonl(jobs_results_file)
elif jobs_results is None:
if jobs_results is None:
jobs_results = self.jobs_ids

results_df = pd.DataFrame(jobs_results)
# %%
total_counts = results_df["conclusion"].value_counts()
total_ratio = total_counts / len(results_df)
difficulty_counts = results_df.groupby("difficulty")["conclusion"].value_counts().unstack().fillna(0)
difficulty_counts = (
results_df.groupby("difficulty")["conclusion"]
.value_counts()
.unstack()
.fillna(0)
)
difficulty_ratios = difficulty_counts.div(difficulty_counts.sum(axis=1), axis=0)

print("Overall results")
Expand All @@ -141,14 +185,20 @@ def analyze_results(self, jobs_results=None, jobs_results_file=None):
def eval_dataset(
self,
fix_repo_function,
hf_token=None,
num_dp=None,
force_download=False,
result_filename=None,
dataset_folder=None,
num_dp: int = None,
ids_list: List = None,
force_download = False,
result_filename = None,
dataset_folder = None,
):
print("---------------- Downloading data -------------------")
self.get_dataset(hf_token, num_dp=num_dp, force_download=force_download, dataset_folder=dataset_folder)
self.get_dataset(
num_dp=num_dp,
force_download=force_download,
dataset_folder=dataset_folder,
)
if ids_list is not None:
self.dataset = self.dataset.filter(lambda example: filter_by_id(example, ids_list))
print(f"Got {len(self.dataset)} datapoints")
print("---------------- Running datapoints -------------------")
self.run_dataset(fix_repo_function)
Expand All @@ -158,9 +208,13 @@ def eval_dataset(

def run_datapoint(self, datapoint, fix_repo_function):
# This method is for debugging reasons
jobs_ids_file_path = os.path.join(self.config.out_folder, f"jobs_ids_{self.model_name}.jsonl")
jobs_ids_file_path = os.path.join(
self.config.out_folder, f"jobs_ids_{self.model_name}.jsonl"
)
with open(jobs_ids_file_path, "w") as writer:
job_identificator = process_datapoint(datapoint, fix_repo_function, self.config, self.credentials)
job_identificator = process_datapoint(
datapoint, fix_repo_function, self.config, self.credentials
)
json.dump(job_identificator, writer)
writer.write("\n")
return job_identificator
Expand Down
4 changes: 2 additions & 2 deletions ci-fixing/ci-fixing-benchmark/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos_folder: /mnt/data/shared-data/lca/CI-fix-benchmark/repos # here the cloned repos would be stored
out_folder: /mnt/data/galimzyanov/data/LCA/benchmark/out # here the result files would be stored
data_cache_dir: /mnt/data/galimzyanov/data/LCA/temp # here the cached dataset would be stored
username: timur-for-test # your GitHub username
test_username: test_user # username that would be displayed in the benchmark
username_gh: timur-for-test # your GitHub username
# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
language: Python # dataset language (now only Python is available)
33 changes: 12 additions & 21 deletions ci-fixing/ci-fixing-benchmark/benchmark_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from omegaconf import OmegaConf
import json
import shutil
import os
import shutil


def read_jsonl(file_path):
Expand All @@ -11,29 +10,13 @@ def read_jsonl(file_path):
data.append(json.loads(line))
return data


def save_jsonl(file_path, data):
with open(file_path, "w") as f:
for entry in data:
json.dump(entry, f)
f.write("\n")

def get_token_gh(config_path):
config_private = OmegaConf.load(config_path)
with open(config_private.token_gh_path) as f:
token_gh = f.read()
return token_gh


def get_token_hf(config_path):
config_private = OmegaConf.load(config_path)
token_hf = get_token(config_private.token_hf_path)
return token_hf

def get_token(token_path):
with open(token_path) as f:
token = f.read()

return token

def filter_out_res(data_folder, out_folder):
"""
Expand All @@ -46,8 +29,16 @@ def filter_out_res(data_folder, out_folder):
orig_path = os.path.join(data_folder, "datapoints_json_verified")
filtered_path = os.path.join(data_folder, "datapoints_json_filtered")
os.makedirs(filtered_path, exist_ok=True)
original_sha = {result["sha_original"][:7] for result in results_none if result["conclusion"] == "failure"}
fixed_sha = {result["sha_original"][:7] for result in results_diff if result["conclusion"] == "success"}
original_sha = {
result["sha_original"][:7]
for result in results_none
if result["conclusion"] == "failure"
}
fixed_sha = {
result["sha_original"][:7]
for result in results_diff
if result["conclusion"] == "success"
}

sha_valid = original_sha.intersection(fixed_sha)

Expand Down
Loading

0 comments on commit 6d841da

Please sign in to comment.