Unverified Commit 07bd7e23 authored by Baber Abbasi's avatar Baber Abbasi Committed by GitHub
Browse files

initialize tokenizer with bos_token (#2781)

parent ebb498e4
...@@ -184,6 +184,7 @@ class HFLM(TemplateLM): ...@@ -184,6 +184,7 @@ class HFLM(TemplateLM):
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
use_fast_tokenizer=use_fast_tokenizer, use_fast_tokenizer=use_fast_tokenizer,
gguf_file=gguf_file, gguf_file=gguf_file,
add_bos_token=add_bos_token,
) )
# if we passed `pretrained` as a string, initialize our model now # if we passed `pretrained` as a string, initialize our model now
...@@ -688,6 +689,7 @@ class HFLM(TemplateLM): ...@@ -688,6 +689,7 @@ class HFLM(TemplateLM):
trust_remote_code: Optional[bool] = False, trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True, use_fast_tokenizer: Optional[bool] = True,
gguf_file: Optional[str] = None, gguf_file: Optional[str] = None,
add_bos_token: Optional[bool] = False,
) -> None: ) -> None:
""" """
Helper method during initialization. Helper method during initialization.
...@@ -706,6 +708,9 @@ class HFLM(TemplateLM): ...@@ -706,6 +708,9 @@ class HFLM(TemplateLM):
else: else:
kwargs["use_fast"] = use_fast_tokenizer kwargs["use_fast"] = use_fast_tokenizer
if add_bos_token:
kwargs["add_bos_token"] = True
if tokenizer: if tokenizer:
if isinstance(tokenizer, str): if isinstance(tokenizer, str):
self.tokenizer = transformers.AutoTokenizer.from_pretrained( self.tokenizer = transformers.AutoTokenizer.from_pretrained(
......
...@@ -123,6 +123,7 @@ class VLLM(TemplateLM): ...@@ -123,6 +123,7 @@ class VLLM(TemplateLM):
tokenizer_mode=tokenizer_mode, tokenizer_mode=tokenizer_mode,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
revision=tokenizer_revision, revision=tokenizer_revision,
add_bos_token=add_bos_token,
) )
self.tokenizer = configure_pad_token(self.tokenizer) self.tokenizer = configure_pad_token(self.tokenizer)
self.add_bos_token = add_bos_token self.add_bos_token = add_bos_token
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment