Unverified Commit f9b4bea0 authored by Alexander Kozlov's avatar Alexander Kozlov Committed by GitHub
Browse files

Added cache_block_outputs option to enable GPTQ for non-regular models (#27032)



* Added cache_block_outputs option to enable GPTQ for non-regular models

* Update src/transformers/utils/quantization_config.py
Co-authored-by: default avatarMarc Sun <57196510+SunMarc@users.noreply.github.com>

* Update src/transformers/utils/quantization_config.py
Co-authored-by: default avatarMarc Sun <57196510+SunMarc@users.noreply.github.com>

* Fixed style

* Update src/transformers/utils/quantization_config.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------
Co-authored-by: default avatarMarc Sun <57196510+SunMarc@users.noreply.github.com>
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent 037fb7d0
......@@ -360,6 +360,8 @@ class GPTQConfig(QuantizationConfigMixin):
max_input_length (`int`, *optional*):
The maximum input length. This is needed to initialize a buffer that depends on the maximum expected input
length. It is specific to the exllama backend with act-order.
cache_block_outputs (`bool`, *optional*, defaults to `True`):
Whether to cache block outputs to reuse as inputs for the succeeding block.
"""
def __init__(
......@@ -380,6 +382,7 @@ class GPTQConfig(QuantizationConfigMixin):
pad_token_id: Optional[int] = None,
disable_exllama: bool = False,
max_input_length: Optional[int] = None,
cache_block_outputs: bool = True,
**kwargs,
):
self.quant_method = QuantizationMethod.GPTQ
......@@ -399,6 +402,7 @@ class GPTQConfig(QuantizationConfigMixin):
self.pad_token_id = pad_token_id
self.disable_exllama = disable_exllama
self.max_input_length = max_input_length
self.cache_block_outputs = cache_block_outputs
self.post_init()
def get_loading_attributes(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment