Skip to content

Commit

Permalink
added multiple models, along the lines of multiple languages. (#40)
Browse files Browse the repository at this point in the history
* added multiple models, along the lines of multiple languages. Several functions are called twice, output goes into multilingual and multimodel within output and raw_results dirs. Two hardcoded model names in ontogpt have to be edited in order for this to work

* del comments

* multimodel plots etc polished and finished
  • Loading branch information
leokim-l authored Aug 2, 2024
1 parent b23dd69 commit d90eec6
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 45 deletions.
16 changes: 8 additions & 8 deletions src/malco/post_process/compute_mrr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ def mondo_adapter() -> OboGraphInterface:
"""
return get_adapter("sqlite:obo:mondo")

def compute_mrr(output_dir, prompt_dir, correct_answer_file,
def compute_mrr(comparing, output_dir, prompt_dir, correct_answer_file,
raw_results_dir) -> Path:
# Read in results TSVs from self.output_dir that match glob results*tsv
results_data = []
results_files = []
num_ppkt = 0

for subdir, dirs, files in os.walk(output_dir):
for subdir, dirs, files in os.walk(output_dir): # maybe change this so it only looks into multilingual/multimodel? I.e. use that as outputdir...?
for filename in files:
if filename.startswith("result") and filename.endswith(".tsv"):
file_path = os.path.join(subdir, filename)
Expand All @@ -49,7 +49,7 @@ def compute_mrr(output_dir, prompt_dir, correct_answer_file,
label_to_correct_term = answers.set_index("label")["term"].to_dict()
# Calculate the Mean Reciprocal Rank (MRR) for each file
mrr_scores = []
header = ["lang", "n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10", "n10p", "nf"]
header = [comparing, "n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8", "n9", "n10", "n10p", "nf"]
rank_df = pd.DataFrame(0, index=np.arange(len(results_files)), columns=header)

cache_file = output_dir / "cache_log.txt"
Expand Down Expand Up @@ -85,15 +85,15 @@ def compute_mrr(output_dir, prompt_dir, correct_answer_file,
)

# Save full data frame
full_df_file = raw_results_dir / results_files[i][0:2] / "full_df_results.tsv"
full_df_file = raw_results_dir / results_files[i].split("/")[0] / "full_df_results.tsv"
df.to_csv(full_df_file, sep='\t', index=False)

# Calculate MRR for this file
mrr = df.groupby("label")["reciprocal_rank"].max().mean()
mrr_scores.append(mrr)

# Calculate top<n> of each rank
rank_df.loc[i,"lang"] = results_files[i][0:2]
rank_df.loc[i, comparing] = results_files[i].split("/")[0]

ppkts = df.groupby("label")[["rank","is_correct"]]
index_matches = df.index[df['is_correct']]
Expand Down Expand Up @@ -133,12 +133,12 @@ def compute_mrr(output_dir, prompt_dir, correct_answer_file,

print("MRR scores are:\n")
print(mrr_scores)
plot_data_file = plot_dir / "plotting_data.tsv"
mrr_file = plot_dir / "mrr_result.tsv"

# write out results for plotting
with plot_data_file.open('w', newline = '') as dat:
with mrr_file.open('w', newline = '') as dat:
writer = csv.writer(dat, quoting = csv.QUOTE_NONNUMERIC, delimiter = '\t', lineterminator='\n')
writer.writerow(results_files)
writer.writerow(mrr_scores)

return plot_data_file, plot_dir, num_ppkt, topn_file
return mrr_file, plot_dir, num_ppkt, topn_file
31 changes: 19 additions & 12 deletions src/malco/post_process/generate_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@

# Make a nice plot, use it as function or as script

def make_plots(plot_data_file, plot_dir, languages, num_ppkt, topn_file):
with plot_data_file.open('r', newline = '') as f:
def make_plots(mrr_file, plot_dir, languages, num_ppkt, models, topn_file, comparing):
if comparing=="model":
name_string = str(len(models))
else:
name_string = str(len(languages))

with mrr_file.open('r', newline = '') as f:
lines = csv.reader(f, quoting = csv.QUOTE_NONNUMERIC, delimiter = '\t', lineterminator='\n')
results_files = next(lines)
mrr_scores = next(lines)
#lines = f.read().splitlines()

print(results_files)
print(mrr_scores)
Expand All @@ -21,8 +25,9 @@ def make_plots(plot_data_file, plot_dir, languages, num_ppkt, topn_file):
plt.xlabel("Results File")
plt.ylabel("Mean Reciprocal Rank (MRR)")
plt.title("MRR of Correct Answers Across Different Results Files")
plot_path = plot_dir / (str(len(languages)) + "_langs_" + str(num_ppkt) + "ppkt.png")
plot_path = plot_dir / (name_string + "_" + comparing + "_" + str(num_ppkt) + "ppkt.png")
plt.savefig(plot_path)
plt.close()

# Plotting bar-plots with top<n> ranks
df = pd.read_csv(topn_file, delimiter='\t')
Expand All @@ -33,17 +38,19 @@ def make_plots(plot_data_file, plot_dir, languages, num_ppkt, topn_file):
df["not_found"] = df["nf"]

df_aggr = pd.DataFrame()
df_aggr = pd.melt(df, id_vars="lang", value_vars=["top1", "top3", "top5", "top10", "not_found"], var_name="Rank_in", value_name="counts")
df_aggr = pd.melt(df, id_vars=comparing, value_vars=["top1", "top3", "top5", "top10", "not_found"], var_name="Rank_in", value_name="counts")
df_aggr["percentage"] = df_aggr["counts"]/num_ppkt
bar_data_file = plot_dir / "topn_aggr.tsv"
df_aggr.to_csv(bar_data_file, sep='\t', index=False)

sns.barplot(x="Rank_in", y="counts", data = df_aggr, hue = "lang")
sns.barplot(x="Rank_in", y="percentage", data = df_aggr, hue = comparing)

plt.xlabel("Number of Ranks")
plt.ylabel("Number of Correct Diagnoses")
plt.title("Rank Comparison for Different Languages")
plot_path = plot_dir / ("barplot_" + str(len(languages)) + "_langs_" + str(num_ppkt) + "ppkt.png")
plt.xlabel("Number of Ranks in")
plt.ylabel("Percentage of Cases")
plt.title("Rank Comparison for Differential Diagnosis")
breakpoint()
plt.legend(title=comparing)
plot_path = plot_dir / ("barplot_" + name_string + "_" + comparing + "_" + str(num_ppkt) + "ppkt.png")
plt.savefig(plot_path)
plt.show()

plt.close()

19 changes: 14 additions & 5 deletions src/malco/post_process/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os


def post_process(raw_results_dir: Path, output_dir: Path, langs: tuple) -> None:
def post_process(raw_results_dir: Path, output_dir: Path, langs: tuple, models: tuple) -> None:
"""
Post-process the raw results output to standardised PhEval TSV format.
Expand All @@ -14,10 +14,19 @@ def post_process(raw_results_dir: Path, output_dir: Path, langs: tuple) -> None:
"""

for lang in langs:
raw_results_lang = raw_results_dir / lang
output_lang = output_dir / lang
raw_results_lang.mkdir(exist_ok=True)
output_lang.mkdir(exist_ok=True)
raw_results_lang = raw_results_dir / "multilingual" / lang
output_lang = output_dir / "multilingual" / lang
raw_results_lang.mkdir(exist_ok=True, parents=True)
output_lang.mkdir(exist_ok=True, parents=True)

create_standardised_results(raw_results_dir=raw_results_lang,
output_dir=output_lang, output_file_name="results.tsv")

for model in models:
raw_results_model = raw_results_dir / "multimodel" / model
output_model = output_dir / "multimodel" / model
raw_results_model.mkdir(exist_ok=True, parents=True)
output_model.mkdir(exist_ok=True, parents=True)

create_standardised_results(raw_results_dir=raw_results_model,
output_dir=output_model, output_file_name="results.tsv")
37 changes: 28 additions & 9 deletions src/malco/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,36 @@
import subprocess


def call_ontogpt(lang, raw_results_dir, input_dir):
command = (
f"ontogpt -v run-multilingual-analysis "
f"--output={raw_results_dir}/{lang}/results.yaml " # save raw OntoGPT output
f"{input_dir}/prompts/{lang}/ "
f"{raw_results_dir}/{lang}/differentials_by_file/"
)
def call_ontogpt(lang, raw_results_dir, input_dir, model, modality):
if modality=="several_languages":
command = (
f"ontogpt -v run-multilingual-analysis "
f"--output={raw_results_dir}/{lang}/results.yaml " # save raw OntoGPT output
f"{input_dir}/prompts/{lang}/ "
f"{raw_results_dir}/{lang}/differentials_by_file/ "
f"--model={model}"
)
elif modality=="several_models":
command = (
f"ontogpt -v run-multilingual-analysis "
f"--output={raw_results_dir}/{model}/results.yaml " # save raw OntoGPT output
f"{input_dir}/prompts/{lang}/ "
f"{raw_results_dir}/{model}/differentials_by_file/ "
f"--model={model}"
)
else:
command(f"echo Something is not working...")
print(f"Running command: {command}")
process = subprocess.Popen(command, shell=True)
process.communicate()
print(f"Finished command for {lang}")
print(f"Finished command for language {lang} and model {model}")


def run(testdata_dir: Path,
raw_results_dir: Path,
input_dir: Path,
langs: tuple,
models: tuple,
max_workers: int = None) -> None:
"""
Run the tool to obtain the raw results.
Expand All @@ -35,5 +48,11 @@ def run(testdata_dir: Path,
if max_workers is None:
max_workers = multiprocessing.cpu_count()

modality = "several_languages"
with multiprocessing.Pool(processes=max_workers) as pool:
pool.starmap(call_ontogpt, [(lang, raw_results_dir, input_dir) for lang in langs])
pool.starmap(call_ontogpt, [(lang, raw_results_dir / "multilingual", input_dir, "gpt-4-turbo", modality) for lang in langs])

# English only many models
modality = "several_models"
with multiprocessing.Pool(processes=max_workers) as pool:
pool.starmap(call_ontogpt, [("en", raw_results_dir / "multimodel", input_dir, model, modality) for model in models])
31 changes: 20 additions & 11 deletions src/malco/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@ class MalcoRunner(PhEvalRunner):
version: str
# Declare a tuple (immutable!) of languages
languages = ("en", "es", "nl", "it", "de")
models = ('gpt-4o', 'gpt-4') # Decide on list of models: Claude-Sonnet (Anthropic key),

def prepare(self):
"""
Pre-process any data and inputs necessary to run the tool.
"""
print("Preparing...\n")
# Before this prepare step:
# We start with cohort with 1 phenopacket per disease, run
# phenopacket2prompt.jar to get prompts
# We then commit this to the repo, and the phenopackets and prompts here
# are the source of truth
pass

def run(self):
Expand All @@ -41,7 +37,8 @@ def run(self):
run(testdata_dir=self.testdata_dir,
raw_results_dir=self.raw_results_dir,
input_dir=self.input_dir,
langs=self.languages)
langs=self.languages,
models=self.models)


def post_process(self,
Expand All @@ -56,13 +53,25 @@ def post_process(self,

post_process(raw_results_dir=self.raw_results_dir,
output_dir=self.output_dir,
langs=self.languages)
langs=self.languages,
models=self.models)

plot_data_file, plot_dir, num_ppkt, topn_file = compute_mrr(
output_dir=self.output_dir,
comparing = "language"
mrr_file, plot_dir, num_ppkt, topn_file = compute_mrr(comparing,
output_dir=self.output_dir / "multilingual" ,
prompt_dir=os.path.join(self.input_dir, prompts_subdir_name),
correct_answer_file=correct_answer_file,
raw_results_dir=self.raw_results_dir)
raw_results_dir=self.raw_results_dir / "multilingual")

if print_plot:
make_plots(plot_data_file, plot_dir, self.languages, num_ppkt, topn_file)
make_plots(mrr_file, plot_dir, self.languages, num_ppkt, self.models, topn_file, comparing)

comparing = "model"
mrr_file, plot_dir, num_ppkt, topn_file = compute_mrr( comparing,
output_dir=self.output_dir / "multimodel" ,
prompt_dir=os.path.join(self.input_dir, prompts_subdir_name),
correct_answer_file=correct_answer_file,
raw_results_dir=self.raw_results_dir / "multimodel" )

if print_plot:
make_plots(mrr_file, plot_dir, self.languages, num_ppkt, self.models, topn_file, comparing)

0 comments on commit d90eec6

Please sign in to comment.