switched to compressed tensors instrad of sparseml

neuralmagic · Jul 2, 2024 · 655389d · 655389d
1 parent ceaf019
commit 655389d
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 9 deletions.
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -18,11 +18,9 @@ peft
 requests==2.31  # required for python 3.8 testing
 ray
 sentence-transformers # required for embedding
-optimum     # required for hf gptq baselines
-auto-gptq   # required for hf gptq baselines
 torchvision # required for the image processor of phi3v
-sparseml==1.8.0 # required for compressed-tensors
-compressed-tensors==0.4.0 # required for compressed-tensors
+git+https://github.com/vllm-project/llm-compressor.git # required for compressed-tensors
+compressed-tensors==0.4.0
 
 # Benchmarking
 aiohttp

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -185,7 +185,7 @@ def __init__(
         model_kwargs: Optional[Dict[str, Any]] = None,
         is_embedding_model: bool = False,
         is_vision_model: bool = False,
-        is_sparseml_model: bool = False,
+        is_compressed_tensors_model: bool = False,
         **kwargs,
     ) -> None:
         assert dtype in _STR_DTYPE_TO_TORCH_DTYPE
@@ -204,8 +204,8 @@ def __init__(
         else:
             if is_vision_model:
                 auto_cls = AutoModelForVision2Seq
-            elif is_sparseml_model:
-                from sparseml.transformers import SparseAutoModelForCausalLM
+            elif is_compressed_tensors_model:
+                from llmcompressor.transformers import SparseAutoModelForCausalLM
                 auto_cls = SparseAutoModelForCausalLM
             else:
                 auto_cls = AutoModelForCausalLM

diff --git a/tests/models/test_compressed_tensors.py b/tests/models/test_compressed_tensors.py
@@ -36,9 +36,9 @@ def test_models(
 ) -> None:
     # Run sparseml.
     with hf_runner(model_name=model_name,
-                   is_sparseml_model=True) as sparseml_model:
+                   is_compressed_tensors_model=True) as compressed_tensors_models:
 
-        sparseml_outputs = sparseml_model.generate_greedy_logprobs_limit(
+        sparseml_outputs = compressed_tensors_models.generate_greedy_logprobs_limit(
             example_prompts, MAX_TOKENS, NUM_LOGPROBS)
 
     # Run vllm.