Unverified Commit 4c062d44 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Update huggingface.py

parent 436edcc9
...@@ -162,7 +162,7 @@ class HuggingFaceAutoLM(BaseLM): ...@@ -162,7 +162,7 @@ class HuggingFaceAutoLM(BaseLM):
gptq_use_triton (bool, optional, defaults to False): gptq_use_triton (bool, optional, defaults to False):
Use Triton for GPTQ inference. Use Triton for GPTQ inference.
inject_fused_attention (bool, optional, defaults to True): inject_fused_attention (bool, optional, defaults to True):
Inject fused attention. Inject fused attention into GPTQ model.
bnb_4bit_quant_type (str, optional, defaults to None): bnb_4bit_quant_type (str, optional, defaults to None):
The quantization type to use for BnB 4bit quantization. See: The quantization type to use for BnB 4bit quantization. See:
https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L77 https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L77
...@@ -781,4 +781,4 @@ def stop_sequences_criteria( ...@@ -781,4 +781,4 @@ def stop_sequences_criteria(
for sequence in stop_sequences for sequence in stop_sequences
], ],
] ]
) )
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment