f"Tokenizer has 'add_bos_token' attribute set -- using BOS token based on tokenizer configuration for model type '{self.config.model_type}'. To control explicitly, set `add_bos_token=True|False`"
)
else:
self.add_bos_token=False
self._max_length=max_length
self._max_length=max_length
self.pretrained=pretrained
self.pretrained=pretrained
...
@@ -748,7 +740,7 @@ class HFLM(TemplateLM):
...
@@ -748,7 +740,7 @@ class HFLM(TemplateLM):
trust_remote_code:bool|None=False,
trust_remote_code:bool|None=False,
use_fast_tokenizer:bool|None=True,
use_fast_tokenizer:bool|None=True,
gguf_file:str|None=None,
gguf_file:str|None=None,
add_bos_token:bool|None=False,
add_bos_token:bool|None=None,
subfolder:str|None="",
subfolder:str|None="",
)->None:
)->None:
"""Helper method during initialization.
"""Helper method during initialization.
...
@@ -767,8 +759,8 @@ class HFLM(TemplateLM):
...
@@ -767,8 +759,8 @@ class HFLM(TemplateLM):
else:
else:
kwargs["use_fast"]=use_fast_tokenizer
kwargs["use_fast"]=use_fast_tokenizer
ifadd_bos_token:
ifadd_bos_tokenisnotNone:
kwargs["add_bos_token"]=True
kwargs["add_bos_token"]=add_bos_token
ifsubfolder:
ifsubfolder:
kwargs["subfolder"]=subfolder
kwargs["subfolder"]=subfolder
...
@@ -868,16 +860,12 @@ class HFLM(TemplateLM):
...
@@ -868,16 +860,12 @@ class HFLM(TemplateLM):
)->list[int]:
)->list[int]:
# default for None - empty dict, use predefined tokenizer param
# default for None - empty dict, use predefined tokenizer param
# used for all models except for CausalLM or predefined value
# used for all models except for CausalLM or predefined value
special_tokens_kwargs:dict=(
{
special_tokens_kwargs=(
"add_special_tokens":self.add_bos_token
{"add_special_tokens":add_special_tokens}
ifadd_special_tokensisNone
if(isinstance(add_special_tokens,bool))
elseadd_special_tokens
else{"add_special_tokens":self.add_bos_token}
}
ifself.add_bos_tokenisnotNone
ifself.backend=="causal"
# otherwise the method explicitly defines the value