Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
337871c6
"vscode:/vscode.git/clone" did not exist on "a12612a48fa6e3578294dc4f418d3abce10a7938"
Unverified
Commit
337871c6
authored
May 28, 2023
by
Woosuk Kwon
Committed by
GitHub
May 28, 2023
Browse files
Enable LLaMA fast tokenizer (#132)
parent
56b7f0ef
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
7 deletions
+11
-7
cacheflow/sampling_params.py
cacheflow/sampling_params.py
+1
-1
cacheflow/server/tokenizer_utils.py
cacheflow/server/tokenizer_utils.py
+10
-6
No files found.
cacheflow/sampling_params.py
View file @
337871c6
...
...
@@ -129,7 +129,7 @@ class SamplingParams:
f
"frequency_penalty=
{
self
.
frequency_penalty
}
, "
f
"temperature=
{
self
.
temperature
}
, "
f
"top_p=
{
self
.
top_p
}
, "
f
"top_k=
{
self
.
top_k
}
,"
f
"top_k=
{
self
.
top_k
}
,
"
f
"use_beam_search=
{
self
.
use_beam_search
}
, "
f
"stop=
{
self
.
stop
}
, "
f
"ignore_eos=
{
self
.
ignore_eos
}
, "
...
...
cacheflow/server/tokenizer_utils.py
View file @
337871c6
...
...
@@ -7,11 +7,7 @@ from cacheflow.logger import init_logger
logger
=
init_logger
(
__name__
)
_MODEL_TYPES_WITH_SLOW_TOKENIZER
=
[
# LLaMA fast tokenizer has a bug related to protobuf.
# See https://github.com/WoosukKwon/cacheflow/issues/80#issue-1698550554
"llama"
,
]
_MODEL_TYPES_WITH_SLOW_TOKENIZER
=
[]
def
get_tokenizer
(
...
...
@@ -20,7 +16,15 @@ def get_tokenizer(
**
kwargs
,
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
config
=
AutoConfig
.
from_pretrained
(
model_name
)
if
config
.
model_type
in
_MODEL_TYPES_WITH_SLOW_TOKENIZER
:
if
config
.
model_type
==
"llama"
and
getattr
(
kwargs
,
"use_fast"
,
True
):
# LLaMA fast tokenizer causes protobuf errors in some environments.
# However, we found that the below LLaMA fast tokenizer works well in
# most environments.
model_name
=
"hf-internal-testing/llama-tokenizer"
logger
.
info
(
f
"Using the LLaMA fast tokenizer in '
{
model_name
}
' to avoid "
"potential protobuf errors."
)
elif
config
.
model_type
in
_MODEL_TYPES_WITH_SLOW_TOKENIZER
:
if
getattr
(
kwargs
,
"use_fast"
,
False
)
==
True
:
raise
ValueError
(
f
"Cannot use the fast tokenizer for
{
config
.
model_type
}
due to "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment