From 655389d9200fa09385351dc1043d33d910cd3be1 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 2 Jul 2024 19:17:48 +0000 Subject: [PATCH] switched to compressed tensors instrad of sparseml --- requirements-test.txt | 6 ++---- tests/conftest.py | 6 +++--- tests/models/test_compressed_tensors.py | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index bf06c470ee405..9e3a50ad0653c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -18,11 +18,9 @@ peft requests==2.31 # required for python 3.8 testing ray sentence-transformers # required for embedding -optimum # required for hf gptq baselines -auto-gptq # required for hf gptq baselines torchvision # required for the image processor of phi3v -sparseml==1.8.0 # required for compressed-tensors -compressed-tensors==0.4.0 # required for compressed-tensors +git+https://github.com/vllm-project/llm-compressor.git # required for compressed-tensors +compressed-tensors==0.4.0 # Benchmarking aiohttp diff --git a/tests/conftest.py b/tests/conftest.py index 5891804d00880..00bc24a1ab09d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -185,7 +185,7 @@ def __init__( model_kwargs: Optional[Dict[str, Any]] = None, is_embedding_model: bool = False, is_vision_model: bool = False, - is_sparseml_model: bool = False, + is_compressed_tensors_model: bool = False, **kwargs, ) -> None: assert dtype in _STR_DTYPE_TO_TORCH_DTYPE @@ -204,8 +204,8 @@ def __init__( else: if is_vision_model: auto_cls = AutoModelForVision2Seq - elif is_sparseml_model: - from sparseml.transformers import SparseAutoModelForCausalLM + elif is_compressed_tensors_model: + from llmcompressor.transformers import SparseAutoModelForCausalLM auto_cls = SparseAutoModelForCausalLM else: auto_cls = AutoModelForCausalLM diff --git a/tests/models/test_compressed_tensors.py b/tests/models/test_compressed_tensors.py index baaab38e5e008..a5158ee8cb9c9 100644 --- a/tests/models/test_compressed_tensors.py +++ b/tests/models/test_compressed_tensors.py @@ -36,9 +36,9 @@ def test_models( ) -> None: # Run sparseml. with hf_runner(model_name=model_name, - is_sparseml_model=True) as sparseml_model: + is_compressed_tensors_model=True) as compressed_tensors_models: - sparseml_outputs = sparseml_model.generate_greedy_logprobs_limit( + sparseml_outputs = compressed_tensors_models.generate_greedy_logprobs_limit( example_prompts, MAX_TOKENS, NUM_LOGPROBS) # Run vllm.