Commit e9f1af36 authored by windows_on_wsl's avatar windows_on_wsl
Browse files

add trust_remote_code to tokenizer.from_pretrained

parent bc10a390
...@@ -203,6 +203,7 @@ class HuggingFaceAutoLM(BaseLM): ...@@ -203,6 +203,7 @@ class HuggingFaceAutoLM(BaseLM):
revision=revision, revision=revision,
subfolder=subfolder, subfolder=subfolder,
tokenizer=tokenizer, tokenizer=tokenizer,
trust_remote_code=trust_remote_code,
) )
self.tokenizer.model_max_length = self.max_length self.tokenizer.model_max_length = self.max_length
...@@ -330,11 +331,13 @@ class HuggingFaceAutoLM(BaseLM): ...@@ -330,11 +331,13 @@ class HuggingFaceAutoLM(BaseLM):
revision: str, revision: str,
subfolder: str, subfolder: str,
tokenizer: Optional[str] = None, tokenizer: Optional[str] = None,
trust_remote_code: bool = False,
) -> transformers.PreTrainedTokenizer: ) -> transformers.PreTrainedTokenizer:
"""Returns a pre-trained tokenizer from a pre-trained tokenizer configuration.""" """Returns a pre-trained tokenizer from a pre-trained tokenizer configuration."""
tokenizer = self.AUTO_TOKENIZER_CLASS.from_pretrained( tokenizer = self.AUTO_TOKENIZER_CLASS.from_pretrained(
pretrained if tokenizer is None else tokenizer, pretrained if tokenizer is None else tokenizer,
revision=revision + ("/" + subfolder if subfolder is not None else ""), revision=revision + ("/" + subfolder if subfolder is not None else ""),
trust_remote_code=trust_remote_code,
) )
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
return tokenizer return tokenizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment