Fix `add_bos_token` not updated for Gemma tokenizer (#3206)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

Fix `add_bos_token` not updated for Gemma tokenizer (#3206)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
206b7722 · Cyrus Leung · GitHub · 3bc7cc8a · 206b7722
Unverified Commit 206b7722 authored Aug 21, 2025 by Cyrus Leung Committed by GitHub Aug 21, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +6 -5

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -195,6 +195,12 @@ class VLLM(TemplateLM):
            self.batch_size = "auto"
            eval_logger.info("Manual batching is not compatible with data parallelism.")

+        if "gemma" in pretrained.lower():
+            add_bos_token = True
+            eval_logger.info(
+                "Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
+            )
+
        from transformers import AutoConfig

        self._config = AutoConfig.from_pretrained(
@@ -213,11 +219,6 @@ class VLLM(TemplateLM):
            "enable_thinking", enable_thinking
        )
        self.add_bos_token = add_bos_token
-        if "gemma" in pretrained.lower():
-            self.add_bos_token = True
-            eval_logger.info(
-                "Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
-            )

        if parse_version(version("vllm")) >= parse_version("0.8.3"):
            kwargs_resolve_hf_chat_template = {