Skip to content

Commit

Permalink
eval unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Mar 22, 2024
1 parent 4660507 commit 018cc89
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 20 deletions.
47 changes: 27 additions & 20 deletions litgpt/scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@
from litgpt.utils import CLI, copy_config_files


def safe_safetensors(out_dir, repo_id):
from transformers import AutoModel

state_dict = torch.load(out_dir/"model.pth")
model = AutoModel.from_pretrained(
repo_id, state_dict=state_dict
)
model.save_pretrained(out_dir)


def prepare_results(results, save_filepath, print_results=True):
from lm_eval.utils import make_table

if print_results:
print(make_table(results))
if "groups" in results:
print(make_table(results, "groups"))

json_result = json.dumps(
results, indent=2, ensure_ascii=False
)
save_filepath.open("w", encoding="utf-8").write(json_result)


def convert_and_evaluate(
checkpoint_dir: Optional[str] = None,
out_dir: Optional[str] = None,
Expand Down Expand Up @@ -45,7 +69,6 @@ def convert_and_evaluate(
"""

from lm_eval import evaluator
from lm_eval.utils import make_table

if checkpoint_dir is None:
raise ValueError("Provide a checkpoint_dir argument.")
Expand All @@ -68,16 +91,7 @@ def convert_and_evaluate(

if not skip_conversion:
convert_lit_checkpoint(checkpoint_dir=checkpoint_dir, output_dir=out_dir)

from transformers import AutoModel

state_dict = torch.load(out_dir/"model.pth")
model = AutoModel.from_pretrained(
repo_id, state_dict=state_dict
)

# Saves .safetensors files
model.save_pretrained(out_dir)
safe_safetensors(out_dir, repo_id)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

Expand All @@ -94,15 +108,8 @@ def convert_and_evaluate(
torch_random_seed=seed,
)

print(make_table(results))
if "groups" in results:
print(make_table(results, "groups"))

json_result = json.dumps(
results, indent=2, ensure_ascii=False
)

save_filepath.open("w", encoding="utf-8").write(json_result)
print("results", results)
prepare_results(results, save_filepath)


if __name__ == "__main__":
Expand Down
61 changes: 61 additions & 0 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.

import sys
from pathlib import Path

import datasets
import pytest

from litgpt.scripts.download import download_from_hub
from litgpt.scripts.evaluate import safe_safetensors, prepare_results
from litgpt.scripts.convert_lit_checkpoint import convert_lit_checkpoint
from lm_eval import evaluator

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))


@pytest.mark.xfail(
raises=(datasets.builder.DatasetGenerationError, NotImplementedError),
strict=False,
match="Loading a dataset cached in a LocalFileSystem is not supported",
)
def test_run_eval(tmp_path, float_like):
repo_id = "EleutherAI/pythia-14m"
download_from_hub(repo_id=repo_id, checkpoint_dir=tmp_path)

checkpoint_path = Path(tmp_path) / Path(repo_id)

convert_lit_checkpoint(checkpoint_dir=checkpoint_path, output_dir=checkpoint_path)
safe_safetensors(out_dir=checkpoint_path, repo_id=repo_id)

eval_tasks = "coqa,hellaswag"
results = evaluator.simple_evaluate(
model="hf",
model_args=f"pretrained={checkpoint_path}",
tasks=eval_tasks.split(","),
limit=2,
)

save_path = checkpoint_path/"results.json"
prepare_results(results, save_path, print_results=False)

print(checkpoint_path/"dump.txt")
assert save_path.is_file()
assert results["results"] == {
'coqa': {
'alias': 'coqa',
'em,none': 0.0,
'em_stderr,none': 0.0,
'f1,none': 0.0,
'f1_stderr,none': 0.0
},
'hellaswag': {
'acc,none': 0.0,
'acc_stderr,none': 0.0,
'acc_norm,none': 0.5,
'acc_norm_stderr,none': 0.5,
'alias': 'hellaswag'
}
}

0 comments on commit 018cc89

Please sign in to comment.