Fixing cohere tokenizer. (#1697)

f9958ee1 · Nicolas Patry · GitHub · 5062fda4 · f9958ee1
Unverified Commit f9958ee1 authored Apr 05, 2024 by Nicolas Patry Committed by GitHub Apr 05, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

server/text_generation_server/models/flash_cohere.py server/text_generation_server/models/flash_cohere.py +2 -2

No files found.
--- a/server/text_generation_server/models/flash_cohere.py
+++ b/server/text_generation_server/models/flash_cohere.py
@@ -3,7 +3,7 @@ import torch.distributed

 from opentelemetry import trace
 from typing import Optional
-from transformers.models.llama import LlamaTokenizerFast
+from transformers import AutoTokenizer

 from text_generation_server.models import FlashCausalLM
 from text_generation_server.models.custom_modeling.flash_cohere_modeling import (
@@ -36,7 +36,7 @@ class FlashCohere(FlashCausalLM):
        else:
            raise NotImplementedError("FlashCohere is only available on GPU")

-        tokenizer = LlamaTokenizerFast.from_pretrained(
+        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            revision=revision,
            padding_side="left",