From 65cc8d05540378d77f828ff7a66214d3216a9380 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Wed, 24 Apr 2024 10:03:51 -0500 Subject: [PATCH] Use new litserve accelerator="auto" setting (#1336) --- litgpt/deploy/serve.py | 7 ++++--- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/litgpt/deploy/serve.py b/litgpt/deploy/serve.py index 9df48ad98d..4a26e0b14f 100644 --- a/litgpt/deploy/serve.py +++ b/litgpt/deploy/serve.py @@ -40,7 +40,7 @@ def setup(self, device: str) -> None: fabric = L.Fabric( accelerator=device.type, - devices=1 if device.type=="cpu" else [device.index], # TODO: Update once LitServe supports "auto" + devices=1 if device.type=="cpu" else [device.index], precision=precision, ) checkpoint_path = self.checkpoint_dir / "lit_model.pth" @@ -99,7 +99,7 @@ def run_server( top_k: int = 200, max_new_tokens: int = 50, devices: int = 1, - accelerator: str = "cuda", + accelerator: str = "auto", port: int = 8000 ) -> None: """Serve a LitGPT model using LitServe @@ -114,7 +114,8 @@ def run_server( generated text but can also lead to more incoherent texts. max_new_tokens: The number of generation steps to take. devices: How many devices/GPUs to use. - accelerator: The type of accelerator to use. For example, "cuda" or "cpu". + accelerator: The type of accelerator to use. For example, "auto", "cuda", "cpu", or "mps". + The "auto" setting (default) chooses a GPU if available, and otherwise uses a CPU. port: The network port number on which the model is configured to be served. """ check_valid_checkpoint_dir(checkpoint_dir, model_filename="lit_model.pth") diff --git a/pyproject.toml b/pyproject.toml index b6fbec18b8..1343429619 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "torch>=2.2.0", "lightning==2.3.0.dev20240328", "jsonargparse[signatures]>=4.27.6", - "litserve==0.0.0.dev2", # imported by litgpt.deploy + "litserve>=0.1.0" # imported by litgpt.deploy ] [project.urls]