Merge pull request #643 from gakada/master

Fix trust_remote_code, bnb_4bit_*, max_batch_size

Merge pull request #643 from gakada/master
Fix trust_remote_code, bnb_4bit_*, max_batch_size
d1537059 · Hailey Schoelkopf · GitHub · 13014b2c · a946c6cd · d1537059
Unverified Commit d1537059 authored Jul 03, 2023 by Hailey Schoelkopf Committed by GitHub Jul 03, 2023
5 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ env
 data/
 lm_cache
 .idea
+
+*.egg-info/
--- a/lm_eval/base.py
+++ b/lm_eval/base.py
@@ -309,7 +309,7 @@ class BaseLM(LM):
            if override_bs is not None
            else 0,
            fn=_batch_scheduler
-            if self.batch_size == "auto" and n_reordered_requests > 0
+            if self.batch_size == "auto" and n_reordered_requests > 0 and not override_bs
            else None,
        ):
            inps = []

--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -80,6 +80,7 @@ def simple_evaluate(
        lm = lm_eval.models.get_model("hf-causal")(
                pretrained=model,
                batch_size=batch_size,
+                max_batch_size=max_batch_size,
                )
        no_cache = True
    else:

--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -29,13 +29,14 @@ class HFLM(BaseLM):
        subfolder=None,
        tokenizer=None,
        batch_size=1,
+        max_batch_size=512,
        max_length=None,
        load_in_8bit: Optional[bool] = False,
        trust_remote_code: Optional[bool] = False,
        dtype: Optional[Union[str, torch.dtype]]="auto",
    ):
        super().__init__()
-        
+

        # Initialize model
        if isinstance(pretrained, transformers.PreTrainedModel):
@@ -106,10 +107,13 @@ class HFLM(BaseLM):
        assert isinstance(batch_size, (int, str))

        # setup for automatic batch size detection
-        if batch_size == "auto":
-            self.batch_size_per_gpu = batch_size
+        if str(batch_size).startswith("auto"):
+            batch_size = batch_size.split(":")
+            self.batch_size_per_gpu = batch_size[0]
+            self.batch_schedule = float(batch_size[1]) if len(batch_size) > 1 else 1
        else:
            self.batch_size_per_gpu = int(batch_size)
+        self.max_batch_size = max_batch_size

        self._max_length = max_length


--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -154,7 +154,7 @@ class HuggingFaceAutoLM(BaseLM):
                If True, will trust the remote code when loading the model.
            gptq_use_triton (bool, optional, defaults to False):
                Use Triton for GPTQ inference.
-            bnb_4bit_quant_type (str, optional, defaults to None): 
+            bnb_4bit_quant_type (str, optional, defaults to None):
                The quantization type to use for BnB 4bit quantization. See:
                https://github.com/huggingface/transformers/blob/main/src/transformers/utils/quantization_config.py#L77
            bnb_4bit_compute_dtype (Union[str, torch.dtype], optional, defaults to None):
@@ -279,8 +279,10 @@ class HuggingFaceAutoLM(BaseLM):
            if transformers.__version__ >= "4.30.0":
                model_kwargs["load_in_4bit"] = load_in_4bit
                if load_in_4bit:
-                    model_kwargs["bnb_4bit_quant_type"] = bnb_4bit_quant_type
-                    model_kwargs["bnb_4bit_compute_dtype"] = getattr(torch, bnb_4bit_compute_dtype)
+                    if bnb_4bit_quant_type:
+                        model_kwargs["bnb_4bit_quant_type"] = bnb_4bit_quant_type
+                    if bnb_4bit_compute_dtype:
+                        model_kwargs["bnb_4bit_compute_dtype"] = _get_dtype(bnb_4bit_compute_dtype)
            model = self.AUTO_MODEL_CLASS.from_pretrained(
                pretrained,
                revision=revision + ("/" + subfolder if subfolder is not None else ""),
@@ -331,7 +333,7 @@ class HuggingFaceAutoLM(BaseLM):
        revision: str,
        subfolder: str,
        tokenizer: Optional[str] = None,
-        trust_remote_code: bool = False,
+        trust_remote_code: Optional[bool] = False,
    ) -> transformers.PreTrainedTokenizer:
        """Returns a pre-trained tokenizer from a pre-trained tokenizer configuration."""
        tokenizer = self.AUTO_TOKENIZER_CLASS.from_pretrained(
@@ -501,12 +503,14 @@ class AutoCausalLM(HuggingFaceAutoLM):
        revision: str,
        subfolder: str,
        tokenizer: Optional[str] = None,
+        trust_remote_code: Optional[bool] = False,
    ) -> transformers.PreTrainedTokenizer:
        tokenizer = super()._create_auto_tokenizer(
            pretrained=pretrained,
            revision=revision,
            subfolder=subfolder,
            tokenizer=tokenizer,
+            trust_remote_code=trust_remote_code,
        )
        tokenizer.padding_side = "left"
        return tokenizer