Unverified Commit 4c062d44 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Update huggingface.py

parent 436edcc9
......@@ -162,7 +162,7 @@ class HuggingFaceAutoLM(BaseLM):
gptq_use_triton (bool, optional, defaults to False):
Use Triton for GPTQ inference.
inject_fused_attention (bool, optional, defaults to True):
Inject fused attention.
Inject fused attention into GPTQ model.
bnb_4bit_quant_type (str, optional, defaults to None):
The quantization type to use for BnB 4bit quantization. See:
https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L77
......@@ -781,4 +781,4 @@ def stop_sequences_criteria(
for sequence in stop_sequences
],
]
)
\ No newline at end of file
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment