Added mixed_precision_dtype arg (#3138)

31895e5b · Avelina Asada Hadji-Kyriacou · GitHub · 2ea6114e · 31895e5b
Unverified Commit 31895e5b authored Jul 14, 2025 by Avelina Asada Hadji-Kyriacou Committed by GitHub Jul 14, 2025
Show whitespace changes
Inline Side-by-side

Showing with 36 additions and 20 deletions

lm_eval/models/huggingface.py lm_eval/models/huggingface.py +36 -20

No files found.
--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -76,6 +76,7 @@ class HFLM(TemplateLM):
        device: Optional[str] = "cuda",
        dtype: Optional[Union[str, torch.dtype]] = "auto",
        softmax_dtype: Optional[Union[str, torch.dtype]] = None,
+        mixed_precision_dtype: Optional[Union[str, torch.dtype]] = None,
        batch_size: Optional[Union[int, str]] = 1,
        max_batch_size: Optional[int] = 64,
        trust_remote_code: Optional[bool] = False,
@@ -247,6 +248,11 @@ class HFLM(TemplateLM):
        self.softmax_dtype = (
            get_dtype(softmax_dtype) if softmax_dtype is not None else None
        )
+        self.mixed_precision_dtype = (
+            get_dtype(mixed_precision_dtype)
+            if mixed_precision_dtype is not None
+            else None
+        )
        if str(batch_size).startswith("auto"):
            batch_size = batch_size.split(":")
@@ -903,6 +909,11 @@ class HFLM(TemplateLM):
        logits returned from the model's decoder
        """
        with torch.no_grad():
+            with torch.autocast(
+                device_type=self.device.type,
+                dtype=self.mixed_precision_dtype,
+                enabled=self.mixed_precision_dtype is not None,
+            ):
                if attn_mask is not None or labels is not None:
                    assert attn_mask is not None and labels is not None
                    assert self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM
@@ -934,6 +945,11 @@ class HFLM(TemplateLM):
        stopping_criteria = stop_sequences_criteria(
            self.tokenizer, stop, context.shape[1], context.shape[0]
        )
+        with torch.autocast(
+            device_type=self.device.type,
+            dtype=self.mixed_precision_dtype,
+            enabled=self.mixed_precision_dtype is not None,
+        ):
            return self.model.generate(
                input_ids=context,
                max_length=max_length,