Unverified Commit 658b849a authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

Quantization / TST: Fix remaining quantization tests (#31000)

* Fix remaining quant tests

* Update test_quanto.py
parent fd3c1280
...@@ -45,9 +45,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt ...@@ -45,9 +45,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt
# For video model testing # For video model testing
RUN python3 -m pip install --no-cache-dir decord av==9.2.0 RUN python3 -m pip install --no-cache-dir decord av==9.2.0
# For GGUF tests
RUN python3 -m pip install --no-cache-dir gguf
# Some slow tests require bnb # Some slow tests require bnb
RUN python3 -m pip install --no-cache-dir bitsandbytes RUN python3 -m pip install --no-cache-dir bitsandbytes
......
...@@ -48,6 +48,9 @@ RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 ...@@ -48,6 +48,9 @@ RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
# Add hqq for quantization testing # Add hqq for quantization testing
RUN python3 -m pip install --no-cache-dir hqq RUN python3 -m pip install --no-cache-dir hqq
# For GGUF tests
RUN python3 -m pip install --no-cache-dir gguf
# Add autoawq for quantization testing # Add autoawq for quantization testing
# >=v0.2.3 needed for compatibility with torch 2.2.1 # >=v0.2.3 needed for compatibility with torch 2.2.1
RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl
......
...@@ -440,6 +440,7 @@ class QuantoQuantizationActivationTest(unittest.TestCase): ...@@ -440,6 +440,7 @@ class QuantoQuantizationActivationTest(unittest.TestCase):
self.assertIn("We don't support quantizing the activations with transformers library", str(e.exception)) self.assertIn("We don't support quantizing the activations with transformers library", str(e.exception))
@require_quanto
@require_torch_gpu @require_torch_gpu
class QuantoKVCacheQuantizationTest(unittest.TestCase): class QuantoKVCacheQuantizationTest(unittest.TestCase):
@slow @slow
...@@ -447,7 +448,7 @@ class QuantoKVCacheQuantizationTest(unittest.TestCase): ...@@ -447,7 +448,7 @@ class QuantoKVCacheQuantizationTest(unittest.TestCase):
def test_quantized_cache(self): def test_quantized_cache(self):
EXPECTED_TEXT_COMPLETION = [ EXPECTED_TEXT_COMPLETION = [
"Simply put, the theory of relativity states that 1) the speed of light is the same for all observers, and 2) the laws of physics are the same for all observers.\nThe first part of the theory of relativity", "Simply put, the theory of relativity states that 1) the speed of light is the same for all observers, and 2) the laws of physics are the same for all observers.\nThe first part of the theory of relativity",
"My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my chicken, my pizza, my sal", "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my salads, my chicken, my fish",
] ]
prompts = [ prompts = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment