Unverified Commit df5e4232 authored by Yassine's avatar Yassine Committed by GitHub
Browse files

fix: create a copy for tokenizer object (#18408)

parent 24845aeb
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
Tokenization classes for fast tokenizers (provided by HuggingFace's tokenizers library). For slow (python) tokenizers Tokenization classes for fast tokenizers (provided by HuggingFace's tokenizers library). For slow (python) tokenizers
see tokenization_utils.py see tokenization_utils.py
""" """
import copy
import json import json
import os import os
from collections import defaultdict from collections import defaultdict
...@@ -104,7 +105,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase): ...@@ -104,7 +105,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizerBase):
) )
if tokenizer_object is not None: if tokenizer_object is not None:
fast_tokenizer = tokenizer_object fast_tokenizer = copy.deepcopy(tokenizer_object)
elif fast_tokenizer_file is not None and not from_slow: elif fast_tokenizer_file is not None and not from_slow:
# We have a serialization from tokenizers which let us directly build the backend # We have a serialization from tokenizers which let us directly build the backend
fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file) fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment