Unverified Commit 584eeb53 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

[`AutoGPTQ`] Add correct installation of GPTQ library + fix slow tests (#25713)

* add correct installation of GPTQ library

* update tests values
parent 2febd506
...@@ -50,7 +50,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef ...@@ -50,7 +50,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
RUN python3 -m pip install --no-cache-dir bitsandbytes RUN python3 -m pip install --no-cache-dir bitsandbytes
# Add auto-gptq for gtpq quantization testing # Add auto-gptq for gtpq quantization testing
RUN python3 -m pip install --no-cache-dir auto-gptq RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
# Add einops for additional model testing # Add einops for additional model testing
RUN python3 -m pip install --no-cache-dir einops RUN python3 -m pip install --no-cache-dir einops
......
...@@ -87,7 +87,8 @@ class GPTQTest(unittest.TestCase): ...@@ -87,7 +87,8 @@ class GPTQTest(unittest.TestCase):
EXPECTED_OUTPUTS = set() EXPECTED_OUTPUTS = set()
EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I") EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I")
EXPECTED_OUTPUTS.add("Hello my name is John and I am a very good looking man.") EXPECTED_OUTPUTS.add("Hello my name is John and I am a very good looking man.")
EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a professional photographer") EXPECTED_OUTPUTS.add("Hello my name is Alyson, I am a student in the")
EXPECTED_OUTPUTS.add("Hello my name is Alyson and I am a very sweet,")
# this seems a little small considering that we are doing 4bit quant but we have a small model and ww don't quantize the embeddings # this seems a little small considering that we are doing 4bit quant but we have a small model and ww don't quantize the embeddings
EXPECTED_RELATIVE_DIFFERENCE = 1.664253062 EXPECTED_RELATIVE_DIFFERENCE = 1.664253062
...@@ -215,7 +216,7 @@ class GPTQTest(unittest.TestCase): ...@@ -215,7 +216,7 @@ class GPTQTest(unittest.TestCase):
self.assertEqual(self.quantized_model.config.quantization_config.disable_exllama, True) self.assertEqual(self.quantized_model.config.quantization_config.disable_exllama, True)
# we need to put it directly to the gpu. Otherwise, we won't be able to initialize the exllama kernel # we need to put it directly to the gpu. Otherwise, we won't be able to initialize the exllama kernel
quantized_model_from_saved = AutoModelForCausalLM.from_pretrained( quantized_model_from_saved = AutoModelForCausalLM.from_pretrained(
tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=6), device_map={"": 0} tmpdirname, quantization_config=GPTQConfig(disable_exllama=False, bits=4), device_map={"": 0}
) )
self.assertEqual(quantized_model_from_saved.config.quantization_config.disable_exllama, False) self.assertEqual(quantized_model_from_saved.config.quantization_config.disable_exllama, False)
self.assertEqual(quantized_model_from_saved.config.quantization_config.bits, self.bits) self.assertEqual(quantized_model_from_saved.config.quantization_config.bits, self.bits)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment