update

Lightning-AI · Apr 25, 2024 · 7c425b9 · 7c425b9
1 parent c816ab0
commit 7c425b9
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 4 deletions.
diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py
@@ -27,7 +27,7 @@ def prepare_results(results, save_filepath, print_results=True):
 def convert_and_evaluate(
     checkpoint_dir: Path,
     tasks: Optional[str] = None,
-    out_dir: Optional[str] = None,
+    out_dir: Optional[Path] = None,
     force_conversion: bool = False,
     num_fewshot: Optional[int] = None,
     batch_size: int = 1,
@@ -45,9 +45,7 @@ def convert_and_evaluate(
             Saves to `checkpoint_dir`/evaluate by default.
         force_conversion: Set to `True` to reconvert the model and override
             an existing model.pth from a previous evaluation call.
-        tasks: CSV of task names to evaluate.
-           By default, the following tasks are used:
-           "hellaswag,truthfulqa_mc2,mmlu"
+        tasks: CSV of task names to evaluate. Example: "hellaswag,truthfulqa_mc2,mmlu"
         num_fewshot: Number of examples in few-shot context.
         batch_size: Batch size configuration.
         device: Device to use for evaluation, for example, "cuda" or "cuda:0".

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
@@ -40,6 +40,7 @@ def test_evaluate_script(tmp_path):
     assert (tmp_path / "out_dir" / "results.json").is_file()
     assert "mathqa" in stdout
     assert "Metric" in stdout
+    assert "Loading checkpoint shards" not in stdout
 
 
 @pytest.mark.parametrize("mode", ["file", "entrypoint"])