[CI/Build] Remove sparseml requirement from testing (#7037)

fb3db616 · Michael Goin · GitHub · 2dd34371 · fb3db616 · fb3db616
Unverified Commit fb3db616 authored Aug 01, 2024 by Michael Goin Committed by GitHub Aug 01, 2024
4 changed files
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -14,7 +14,6 @@ peft
 requests
 ray
 sentence-transformers # required for embedding
-sparseml==1.8.0 # required for compressed-tensors
 compressed-tensors==0.4.0 # required for compressed-tensors
 timm # required for internvl test

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -152,7 +152,6 @@ class HfRunner:
        model_kwargs: Optional[Dict[str, Any]] = None,
        is_embedding_model: bool = False,
        is_vision_model: bool = False,
-        is_sparseml_model: bool = False,
    ) -> None:
        torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
@@ -169,9 +168,6 @@ class HfRunner:
        else:
            if is_vision_model:
                auto_cls = AutoModelForVision2Seq
-            elif is_sparseml_model:
-                from sparseml.transformers import SparseAutoModelForCausalLM
-                auto_cls = SparseAutoModelForCausalLM
            else:
                auto_cls = AutoModelForCausalLM

--- a/tests/models/test_compressed_tensors.py
+++ b/tests/models/test_compressed_tensors.py
-"""Compares vllm vs sparseml for compressed-tensors
-Note: vllm and sparseml do not have bitwise correctness, 
-so in this test, we just confirm that the top selected 
-tokens of the are in the top 5 selections of each other.
-"""
-import pytest
-from tests.quantization.utils import is_quant_method_supported
-from .utils import check_logprobs_close
-MODELS = [
-    # No bias
-    "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test",
-    # Bias
-    "neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8"
-]
-MAX_TOKENS = 32
-NUM_LOGPROBS = 5
-@pytest.mark.skipif(
-    not is_quant_method_supported("compressed-tensors"),
-    reason="compressed-tensors is not supported on this machine type.")
-@pytest.mark.parametrize("model_name", MODELS)
-def test_models(
-    vllm_runner,
-    hf_runner,
-    example_prompts,
-    model_name,
-) -> None:
-    # Run sparseml.
-    with hf_runner(model_name=model_name,
-                   is_sparseml_model=True) as sparseml_model:
-        sparseml_outputs = sparseml_model.generate_greedy_logprobs_limit(
-            example_prompts, MAX_TOKENS, NUM_LOGPROBS)
-    # Run vllm.
-    with vllm_runner(model_name=model_name) as vllm_model:
-        vllm_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, MAX_TOKENS, NUM_LOGPROBS)
-    check_logprobs_close(
-        outputs_0_lst=sparseml_outputs,
-        outputs_1_lst=vllm_outputs,
-        name_0="sparseml",
-        name_1="vllm",
-    )
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
-"""Test model set-up and weight loading for sparseml-quantized models.
+"""Test model set-up and weight loading for llmcompressor-quantized models.
 Run `pytest tests/quantization/test_compressed_tensors.py`.
 """