Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d701d50f
Commit
d701d50f
authored
Oct 15, 2025
by
Baber
Browse files
fix bos token handling
parent
aab23be4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
23 deletions
+13
-23
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+13
-23
No files found.
lm_eval/models/huggingface.py
View file @
d701d50f
...
...
@@ -258,15 +258,7 @@ class HFLM(TemplateLM):
else
{}
)
self
.
add_bos_token
=
add_bos_token
if
self
.
add_bos_token
is
None
:
if
getattr
(
self
.
tokenizer
,
"add_bos_token"
,
False
):
self
.
add_bos_token
=
True
eval_logger
.
info
(
f
"Tokenizer has 'add_bos_token' attribute set -- using BOS token based on tokenizer configuration for model type '
{
self
.
config
.
model_type
}
'. To control explicitly, set `add_bos_token=True|False`"
)
else
:
self
.
add_bos_token
=
False
self
.
add_bos_token
=
add_bos_token
if
add_bos_token
is
not
None
else
None
self
.
_max_length
=
max_length
self
.
pretrained
=
pretrained
...
...
@@ -748,7 +740,7 @@ class HFLM(TemplateLM):
trust_remote_code
:
bool
|
None
=
False
,
use_fast_tokenizer
:
bool
|
None
=
True
,
gguf_file
:
str
|
None
=
None
,
add_bos_token
:
bool
|
None
=
Fals
e
,
add_bos_token
:
bool
|
None
=
Non
e
,
subfolder
:
str
|
None
=
""
,
)
->
None
:
"""Helper method during initialization.
...
...
@@ -767,8 +759,8 @@ class HFLM(TemplateLM):
else
:
kwargs
[
"use_fast"
]
=
use_fast_tokenizer
if
add_bos_token
:
kwargs
[
"add_bos_token"
]
=
True
if
add_bos_token
is
not
None
:
kwargs
[
"add_bos_token"
]
=
add_bos_token
if
subfolder
:
kwargs
[
"subfolder"
]
=
subfolder
...
...
@@ -868,16 +860,12 @@ class HFLM(TemplateLM):
)
->
list
[
int
]:
# default for None - empty dict, use predefined tokenizer param
# used for all models except for CausalLM or predefined value
special_tokens_kwargs
:
dict
=
(
{
"add_special_tokens"
:
self
.
add_bos_token
if
add_special_tokens
is
None
else
add_special_tokens
}
if
self
.
backend
==
"causal"
# otherwise the method explicitly defines the value
else
{
"add_special_tokens"
:
add_special_tokens
}
if
isinstance
(
add_special_tokens
,
bool
)
special_tokens_kwargs
=
(
{
"add_special_tokens"
:
add_special_tokens
}
if
(
isinstance
(
add_special_tokens
,
bool
))
else
{
"add_special_tokens"
:
self
.
add_bos_token
}
if
self
.
add_bos_token
is
not
None
else
{}
)
...
...
@@ -906,8 +894,10 @@ class HFLM(TemplateLM):
strings
[
0
],
getattr
(
self
.
tokenizer
,
"bos_token"
,
None
)
):
add_special_tokens
=
{
"add_special_tokens"
:
False
}
elif
self
.
add_bos_token
is
not
None
:
add_special_tokens
=
{
"add_special_tokens"
:
self
.
add_bos_token
}
else
:
add_special_tokens
=
{
"add_special_tokens"
:
False
or
self
.
add_bos_token
}
add_special_tokens
=
{
"add_special_tokens"
:
True
}
encoding
=
self
.
tokenizer
(
strings
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment