fix: LlamaTokenizerFast to AutoTokenizer at flash_mistral.py (#1637)

# What does this PR do? A few cases where you're using a mistral structure or mixtral structure but not a llama tokenizer, why not make it to call the AutoTokenizer in exception handling. Similar PR #619 @Narsil

fix: LlamaTokenizerFast to AutoTokenizer at flash_mistral.py (#1637)
# What does this PR do? A few cases where you're using a mistral structure or mixtral structure but not a llama tokenizer, why not make it to call the AutoTokenizer in exception handling. Similar PR #619 @Narsil
66914f7b · SeongBeomLEE · GitHub · 08e91814 · 66914f7b
Unverified Commit 66914f7b authored Mar 23, 2024 by SeongBeomLEE Committed by GitHub Mar 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 8 deletions

server/text_generation_server/models/flash_mistral.py server/text_generation_server/models/flash_mistral.py +17 -8

No files found.
--- a/server/text_generation_server/models/flash_mistral.py
+++ b/server/text_generation_server/models/flash_mistral.py
@@ -6,7 +6,7 @@ import numpy as np

 from dataclasses import dataclass
 from opentelemetry import trace
-from transformers import PreTrainedTokenizerBase
+from transformers import PreTrainedTokenizerBase, AutoTokenizer
 from transformers.models.llama import LlamaTokenizerFast
 from typing import Optional, Tuple, Type

@@ -317,13 +317,22 @@ class BaseFlashMistral(FlashCausalLM):
        else:
            raise NotImplementedError("FlashMistral is only available on GPU")

-        tokenizer = LlamaTokenizerFast.from_pretrained(
-            model_id,
-            revision=revision,
-            padding_side="left",
-            truncation_side="left",
-            trust_remote_code=trust_remote_code,
-        )
+        try:
+            tokenizer = LlamaTokenizerFast.from_pretrained(
+                model_id,
+                revision=revision,
+                padding_side="left",
+                truncation_side="left",
+                trust_remote_code=trust_remote_code,
+            )
+        except Exception:
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_id,
+                revision=revision,
+                padding_side="left",
+                truncation_side="left",
+                trust_remote_code=trust_remote_code,
+            )

        config = config_cls.from_pretrained(
            model_id, revision=revision, trust_remote_code=trust_remote_code