Unverified Commit d1537059 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge pull request #643 from gakada/master

Fix trust_remote_code, bnb_4bit_*, max_batch_size
parents 13014b2c a946c6cd
......@@ -3,3 +3,5 @@ env
data/
lm_cache
.idea
*.egg-info/
......@@ -309,7 +309,7 @@ class BaseLM(LM):
if override_bs is not None
else 0,
fn=_batch_scheduler
if self.batch_size == "auto" and n_reordered_requests > 0
if self.batch_size == "auto" and n_reordered_requests > 0 and not override_bs
else None,
):
inps = []
......
......@@ -80,6 +80,7 @@ def simple_evaluate(
lm = lm_eval.models.get_model("hf-causal")(
pretrained=model,
batch_size=batch_size,
max_batch_size=max_batch_size,
)
no_cache = True
else:
......
......@@ -29,13 +29,14 @@ class HFLM(BaseLM):
subfolder=None,
tokenizer=None,
batch_size=1,
max_batch_size=512,
max_length=None,
load_in_8bit: Optional[bool] = False,
trust_remote_code: Optional[bool] = False,
dtype: Optional[Union[str, torch.dtype]]="auto",
):
super().__init__()
# Initialize model
if isinstance(pretrained, transformers.PreTrainedModel):
......@@ -106,10 +107,13 @@ class HFLM(BaseLM):
assert isinstance(batch_size, (int, str))
# setup for automatic batch size detection
if batch_size == "auto":
self.batch_size_per_gpu = batch_size
if str(batch_size).startswith("auto"):
batch_size = batch_size.split(":")
self.batch_size_per_gpu = batch_size[0]
self.batch_schedule = float(batch_size[1]) if len(batch_size) > 1 else 1
else:
self.batch_size_per_gpu = int(batch_size)
self.max_batch_size = max_batch_size
self._max_length = max_length
......
......@@ -154,7 +154,7 @@ class HuggingFaceAutoLM(BaseLM):
If True, will trust the remote code when loading the model.
gptq_use_triton (bool, optional, defaults to False):
Use Triton for GPTQ inference.
bnb_4bit_quant_type (str, optional, defaults to None):
bnb_4bit_quant_type (str, optional, defaults to None):
The quantization type to use for BnB 4bit quantization. See:
https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L77
bnb_4bit_compute_dtype (Union[str, torch.dtype], optional, defaults to None):
......@@ -279,8 +279,10 @@ class HuggingFaceAutoLM(BaseLM):
if transformers.__version__ >= "4.30.0":
model_kwargs["load_in_4bit"] = load_in_4bit
if load_in_4bit:
model_kwargs["bnb_4bit_quant_type"] = bnb_4bit_quant_type
model_kwargs["bnb_4bit_compute_dtype"] = getattr(torch, bnb_4bit_compute_dtype)
if bnb_4bit_quant_type:
model_kwargs["bnb_4bit_quant_type"] = bnb_4bit_quant_type
if bnb_4bit_compute_dtype:
model_kwargs["bnb_4bit_compute_dtype"] = _get_dtype(bnb_4bit_compute_dtype)
model = self.AUTO_MODEL_CLASS.from_pretrained(
pretrained,
revision=revision + ("/" + subfolder if subfolder is not None else ""),
......@@ -331,7 +333,7 @@ class HuggingFaceAutoLM(BaseLM):
revision: str,
subfolder: str,
tokenizer: Optional[str] = None,
trust_remote_code: bool = False,
trust_remote_code: Optional[bool] = False,
) -> transformers.PreTrainedTokenizer:
"""Returns a pre-trained tokenizer from a pre-trained tokenizer configuration."""
tokenizer = self.AUTO_TOKENIZER_CLASS.from_pretrained(
......@@ -501,12 +503,14 @@ class AutoCausalLM(HuggingFaceAutoLM):
revision: str,
subfolder: str,
tokenizer: Optional[str] = None,
trust_remote_code: Optional[bool] = False,
) -> transformers.PreTrainedTokenizer:
tokenizer = super()._create_auto_tokenizer(
pretrained=pretrained,
revision=revision,
subfolder=subfolder,
tokenizer=tokenizer,
trust_remote_code=trust_remote_code,
)
tokenizer.padding_side = "left"
return tokenizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment