Skip to content

Commit

Permalink
make the code "flake8 compliant"
Browse files Browse the repository at this point in the history
  • Loading branch information
mrigankpawagi committed Dec 2, 2024
1 parent e36769c commit f733d63
Show file tree
Hide file tree
Showing 16 changed files with 7,363 additions and 182 deletions.
7,187 changes: 7,142 additions & 45 deletions irt/binary_matrix.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions irt/eval_analyse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

# Any other model's data can be added too.


def diff_ques():
"""
Prints the problems which have different answers in the English and Hinglish prompts.
Expand Down
38 changes: 28 additions & 10 deletions irt/irt_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@

current_dir = os.path.dirname(os.path.realpath(__file__))


def compute_irt_params(irt_data: list):
"""
Compute the Item(problem) and User(model) parameters using the IRT approach.
:param irt_data: list : A list of tuples containing the model name, problem ID,
:param irt_data: list : A list of tuples containing the model name, problem ID,
and the binary response of the model to the problem.
:return: tuple : (item_param, user_param)
"""
return irt(irt_data)


def write_csv(file_path, data):
"""
Write data to a CSV file.
Expand All @@ -27,6 +29,7 @@ def write_csv(file_path, data):
writer = csv.writer(file)
writer.writerows(data)


def process_irt_data(lang, irt_matrix, sanitized_code_dir):
"""
Process IRT data for a given language by extracting responses from models.
Expand All @@ -41,15 +44,16 @@ def process_irt_data(lang, irt_matrix, sanitized_code_dir):

for model in os.listdir(sanitized_code_dir):
model = model.replace(".zip", "") if model.endswith(".zip") else model
if model in ["codegen2_1B", "santacoder"]: # Skip these 2 models.
if model in ["codegen2_1B", "santacoder"]: # Skip these 2 models.
continue
models.append(model)
for pid in range(164):
response = irt_matrix[model][pid]
data.append((model, pid, response))

return data


def save_irt_results(output_name, item_param, user_param):
"""
Save IRT results (user parameters and item parameters) to CSV files.
Expand All @@ -66,48 +70,62 @@ def save_irt_results(output_name, item_param, user_param):
user_csv_data = [["MODEL", "VALUE"]]
user_csv_data += sorted(
[[model, f"{value:.3f}"] for model, value in user_param.items()],
key=lambda x: float(x[1])
key=lambda x: float(x[1]),
)
write_csv(
os.path.join(output_dir, f"{output_name}_irt_userparams.csv"), user_csv_data
)
write_csv(os.path.join(output_dir, f"{output_name}_irt_userparams.csv"), user_csv_data)

# Save item parameters
item_csv_data = [["PROBLEM", "DIFFICULTY", "DISCRIMINATION"]]
item_csv_data += [
[problem, f"{value['beta']:.3f}", f"{value['alpha']:.3f}"]
for problem, value in item_param.items()
]
write_csv(os.path.join(output_dir, f"{output_name}_irt_itemparams.csv"), item_csv_data)
write_csv(
os.path.join(output_dir, f"{output_name}_irt_itemparams.csv"), item_csv_data
)

# Save sorted item parameters
sorted_item_csv_data = [item_csv_data[0]] + sorted(
item_csv_data[1:], key=lambda x: float(x[2])
)
write_csv(os.path.join(output_dir, f"{output_name}_sorted_irt_itemparams.csv"), sorted_item_csv_data)
write_csv(
os.path.join(output_dir, f"{output_name}_sorted_irt_itemparams.csv"),
sorted_item_csv_data,
)


def lang_irt():
"""
Compute IRT parameters for models in both Hinglish and English, evaluated independently.
User and item parameters are stored in CSV files.
"""
for lang, irt_matrix in [("English", irt_english), ("Hinglish", irt_hinglish)]:
sanitized_code_dir = os.path.join(current_dir, "..", "samples", lang, "sanitized")
sanitized_code_dir = os.path.join(
current_dir, "..", "samples", lang, "sanitized"
)
data = process_irt_data(lang, irt_matrix, sanitized_code_dir)
item_param, user_param = compute_irt_params(data)
save_irt_results(lang, item_param, user_param)


def combined_irt():
"""
Compute IRT parameters for models in both Hinglish and English, evaluated jointly.
Results are saved to CSV files.
"""
data = []
for lang, irt_matrix in [("English", irt_english), ("Hinglish", irt_hinglish)]:
sanitized_code_dir = os.path.join(current_dir, "..", "samples", lang, "sanitized")
sanitized_code_dir = os.path.join(
current_dir, "..", "samples", lang, "sanitized"
)
data.extend(process_irt_data(lang, irt_matrix, sanitized_code_dir))

item_param, user_param = compute_irt_params(data)
save_irt_results("Combined", item_param, user_param)


if __name__ == "__main__":
"""
Run the desired evaluation scripts.
Expand Down
13 changes: 7 additions & 6 deletions samples/codemodels/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@

# An access token from HUGGINGFACE is necessary to generate the code completions.
HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
login(token=HUGGING_FACE_TOKEN)
login(token=HUGGING_FACE_TOKEN)

def prompt_output(prompt, words):

def prompt_output(prompt, words):
input_ids = tokenizer(prompt, return_tensors="pt")

outputs = model.generate(**input_ids, max_new_tokens=words, do_sample=True)
code_gen = tokenizer.decode(outputs[0])
ind = code_gen.find("<eos>")
if ind == -1:
return code_gen[code_gen.index("<bos>") + 5 :]
return code_gen[code_gen.index("<bos>") + 5:]
else:
return code_gen[code_gen.index("<bos>") + 5 : ind]
return code_gen[code_gen.index("<bos>") + 5:ind]


if __name__ == "__main__":
Expand All @@ -39,7 +39,8 @@ def prompt_output(prompt, words):
data = json.load(f)
for pid in range(1, len(data)): # Starting from 1 as per original code
prompt = data[pid]["prompt"]
with open(os.path.join(gemma_codes_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(gemma_codes_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(prompt_output(prompt, 512))
print(f"done for {pid}")

6 changes: 5 additions & 1 deletion samples/codemodels/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


def gpt(docstr):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # Change to "gpt-4" if necessary. Check out more models at https://platform.openai.com/docs/models
Expand All @@ -29,6 +30,7 @@ def gpt(docstr):
)
return response.choices[0].message.content


if __name__ == "__main__":
base_dir = os.path.dirname(__file__)
path_humaneval = os.path.join(base_dir, "HinglishEval.json")
Expand All @@ -38,7 +40,9 @@ def gpt(docstr):
for pid in [10, 32, 38, 50]:
prompt = data[pid]["prompt"]
with open(
os.path.join(base_dir, "gpt_3.5_turbo_codes", f"{str(pid).zfill(3)}.py"),
os.path.join(
base_dir, "gpt_3.5_turbo_codes", f"{str(pid).zfill(3)}.py"
),
"w",
) as file:
file.write(gpt(prompt))
Expand Down
18 changes: 12 additions & 6 deletions samples/codemodels/llama3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY")


def llama3(docstr):
# Create an OpenAI client with your deepinfra token and endpoint
openai = OpenAI(
Expand All @@ -16,7 +17,7 @@ def llama3(docstr):
)

chat_completion = openai.chat.completions.create(
model="meta-llama/Meta-Llama-3-70B-Instruct", ## MODEL NAME
model="meta-llama/Meta-Llama-3-70B-Instruct", # MODEL NAME
messages=[
{
"role": "system",
Expand All @@ -42,25 +43,30 @@ def llama3(docstr):
path_humaneval_hinglish = os.path.join(base_dir, "HinglishEval.json")
path_humaneval_english = os.path.join(base_dir, "HumanEval.json")

unsanitized_hinglish_dir = os.path.join(base_dir,"Hinglish", "unsanitized", "llama3")
unsanitized_hinglish_dir = os.path.join(
base_dir, "Hinglish", "unsanitized", "llama3"
)
os.makedirs(unsanitized_hinglish_dir, exist_ok=True)

with open(path_humaneval_hinglish) as f:
data = json.load(f)
for pid in range(59, 164): # Adjusted range for Hinglish
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(llama3(prompt))
print(f"done for {pid}")

unsanitized_english_dir = os.path.join(base_dir, "English", "unsanitized", "llama3")
unsanitized_english_dir = os.path.join(base_dir, "English", "unsanitized", "llama3")
os.makedirs(unsanitized_english_dir, exist_ok=True)

with open(path_humaneval_english) as f:
data = json.load(f)
for pid in range(164): # Adjusted range for English
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(llama3(prompt))
print(f"done for {pid}")

19 changes: 14 additions & 5 deletions samples/codemodels/mistral7B.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY")


def mistral7b(docstr):
# Create an OpenAI client with your deepinfra token and endpoint
openai = OpenAI(
Expand All @@ -16,7 +17,7 @@ def mistral7b(docstr):
)

chat_completion = openai.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.3", ## MODEL NAME
model="mistralai/Mistral-7B-Instruct-v0.3", # MODEL NAME
messages=[
{
"role": "system",
Expand All @@ -40,25 +41,33 @@ def mistral7b(docstr):
if __name__ == "__main__":
base_dir = os.path.dirname(__file__)
path_humaneval_hinglish = os.path.join(base_dir, "HinglishEval.json")
unsanitized_hinglish_dir = os.path.join(base_dir, "Hinglish", "unsanitized", "mistral7b")
unsanitized_hinglish_dir = os.path.join(
base_dir, "Hinglish", "unsanitized", "mistral7b"
)
os.makedirs(unsanitized_hinglish_dir, exist_ok=True)

with open(path_humaneval_hinglish) as f:
data = json.load(f)
for pid in range(164):
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(mistral7b(prompt))
print(f"done for {pid}")

path_humaneval_english = os.path.join(base_dir, "HumanEval.json")
unsanitized_english_dir = os.path.join(base_dir, "English", "unsanitized", "mistral7b")
unsanitized_english_dir = os.path.join(
base_dir, "English", "unsanitized", "mistral7b"
)
os.makedirs(unsanitized_english_dir, exist_ok=True)

with open(path_humaneval_english) as f:
data = json.load(f)
for pid in range(164):
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(mistral7b(prompt))
print(f"done for {pid}")
19 changes: 14 additions & 5 deletions samples/codemodels/phi-3-med.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY")


def phigen(docstr):
# Create an OpenAI client with your deepinfra token and endpoint
openai = OpenAI(
Expand All @@ -16,7 +17,7 @@ def phigen(docstr):
)

chat_completion = openai.chat.completions.create(
model="microsoft/Phi-3-medium-4k-instruct", ## MODEL NAME
model="microsoft/Phi-3-medium-4k-instruct", # MODEL NAME
messages=[
{
"role": "system",
Expand All @@ -40,25 +41,33 @@ def phigen(docstr):
if __name__ == "__main__":
base_dir = os.path.dirname(__file__)
path_humaneval_hinglish = os.path.join(base_dir, "HinglishEval.json")
unsanitized_hinglish_dir = os.path.join(base_dir,"Hinglish","unsanitized", "phi-3-gen")
unsanitized_hinglish_dir = os.path.join(
base_dir, "Hinglish", "unsanitized", "phi-3-gen"
)
os.makedirs(unsanitized_hinglish_dir, exist_ok=True)

with open(path_humaneval_hinglish) as f:
data = json.load(f)
for pid in range(164):
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_hinglish_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(phigen(prompt))
print(f"done for {pid}")

path_humaneval_english = os.path.join(base_dir, "HumanEval.json")
unsanitized_english_dir = os.path.join(base_dir, "English", "unsanitized", "phi-3-gen")
unsanitized_english_dir = os.path.join(
base_dir, "English", "unsanitized", "phi-3-gen"
)
os.makedirs(unsanitized_english_dir, exist_ok=True)

with open(path_humaneval_english) as f:
data = json.load(f)
for pid in range(164):
prompt = data[pid]["prompt"]
with open(os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w") as file:
with open(
os.path.join(unsanitized_english_dir, f"{str(pid).zfill(3)}.py"), "w"
) as file:
file.write(phigen(prompt))
print(f"done for {pid}")
10 changes: 4 additions & 6 deletions samples/codemodels/polycoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def prompt_output(prompt, words):


def sanitize(completion, prompt):
generated = completion[len(prompt) :]
generated = completion[len(prompt):]
lines = generated.split("\n")
final_lines = []
for line in lines:
Expand All @@ -28,7 +28,6 @@ def sanitize(completion, prompt):


if __name__ == "__main__":

base_dir = os.path.dirname(__file__)
path_humaneval = os.path.join(base_dir, "HinglishEval.json")

Expand All @@ -37,7 +36,7 @@ def sanitize(completion, prompt):

with open(path_humaneval) as f:
data = json.load(f)
for pid in range(164): # 10,32,38, 50 are special
for pid in range(164): # 10,32,38, 50 are special
prompt = data[pid]["prompt"]
with open(
f"/path/to/polycoder_0.4B_codes/{str(pid).zfill(3)}.py", "w"
Expand All @@ -51,7 +50,7 @@ def sanitize(completion, prompt):

with open(path_humaneval) as f:
data = json.load(f)
for pid in range(164): # 10,32,38, 50 are special
for pid in range(164): # 10,32,38, 50 are special
prompt = data[pid]["prompt"]
with open(
f"/path/to/polycoder_2.7B_codes/{str(pid).zfill(3)}.py", "w"
Expand All @@ -64,12 +63,11 @@ def sanitize(completion, prompt):

with open(path_humaneval) as f:
data = json.load(f)
for pid in range(164): # 10,32,38, 50 are special
for pid in range(164): # 10,32,38, 50 are special
prompt = data[pid]["prompt"]
with open(
f"/path/to/polycoder_160M_codes/{str(pid).zfill(3)}.py", "w"
) as file:
completion = prompt_output(prompt, 512)
file.write(sanitize(completion, prompt))
print(f"done for {pid}")

Loading

0 comments on commit f733d63

Please sign in to comment.