diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py index 12aa3179e9..2a2e7f3a50 100644 --- a/litgpt/eval/evaluate.py +++ b/litgpt/eval/evaluate.py @@ -3,7 +3,7 @@ import json import os from pathlib import Path -from typing import Optional +from typing import Optional, Union import yaml import torch @@ -11,16 +11,6 @@ from litgpt.utils import CLI, copy_config_files -def save_safetensors(out_dir, repo_id): - from transformers import AutoModel - - state_dict = torch.load(out_dir/"model.pth") - model = AutoModel.from_pretrained( - repo_id, state_dict=state_dict - ) - model.save_pretrained(out_dir) - - def prepare_results(results, save_filepath, print_results=True): from lm_eval.utils import make_table @@ -43,6 +33,7 @@ def convert_and_evaluate( num_fewshot: Optional[int] = None, batch_size: int = 1, device: Optional[str] = None, + dtype: Optional[Union[str, torch.dtype]] = None, limit: Optional[float] = None, seed: int = 1234, save_filepath: Optional[str] = None, @@ -102,15 +93,15 @@ def convert_and_evaluate( if not model_path.exists() or force_conversion: convert_lit_checkpoint(checkpoint_dir=checkpoint_dir, output_dir=out_dir) - safetensors_path = out_dir / "model.safetensors" - if not safetensors_path.exists() or force_conversion: - save_safetensors(out_dir, repo_id) + from lm_eval.models.huggingface import HFLM + + state_dict = torch.load(model_path) + model = HFLM(repo_id, state_dict=state_dict, device=device, batch_size=batch_size, dtype=dtype) os.environ["TOKENIZERS_PARALLELISM"] = "false" results = evaluator.simple_evaluate( - model="hf", - model_args=f"pretrained={out_dir}", + model=model, tasks=tasks.split(","), num_fewshot=num_fewshot, batch_size=batch_size, diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 3ea6994532..023621db6a 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -1,6 +1,5 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. -import os import shutil import subprocess import sys @@ -25,7 +24,6 @@ strict=False, match="Loading a dataset cached in a LocalFileSystem is not supported", ) -@mock.patch.dict(os.environ, {"LT_ACCELERATOR": "cpu"}) def test_evaluate_script(tmp_path, monkeypatch): ours_config = Config.from_name("pythia-14m") download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path) @@ -42,6 +40,7 @@ def test_evaluate_script(tmp_path, monkeypatch): checkpoint_dir=tmp_path, out_dir=tmp_path / "out_dir", device="cpu", + dtype=torch.float32, limit=5, tasks="mathqa" )