[MISC] Remove FP8 warning (vllm-project#5472)

Co-authored-by: Philipp Moritz <[email protected]>
opendatahub-io · Jun 13, 2024 · 30299a4 · 30299a4
1 parent 85657b5
commit 30299a4
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/config.py b/vllm/config.py
@@ -212,7 +212,7 @@ def _verify_quantization(self) -> None:
                     f"{self.quantization} quantization is currently not "
                     f"supported in ROCm.")
             if (self.quantization
-                    not in ["marlin", "gptq_marlin_24", "gptq_marlin"]):
+                    not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin")):
                 logger.warning(
                     "%s quantization is not fully "
                     "optimized yet. The speed can be slower than "