Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a74dee9b
Unverified
Commit
a74dee9b
authored
Apr 26, 2024
by
Cyrus Leung
Committed by
GitHub
Apr 25, 2024
Browse files
[Bugfix] Fix parameter name in `get_tokenizer` (#4107)
parent
cf29b7ed
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
5 deletions
+26
-5
tests/tokenization/test_tokenizer.py
tests/tokenization/test_tokenizer.py
+20
-0
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+6
-5
No files found.
tests/tokenization/test_tokenizer.py
0 → 100644
View file @
a74dee9b
import
pytest
from
transformers
import
PreTrainedTokenizerBase
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
TOKENIZER_NAMES
=
[
"facebook/opt-125m"
,
"gpt2"
,
]
@
pytest
.
mark
.
parametrize
(
"tokenizer_name"
,
TOKENIZER_NAMES
)
def
test_tokenizer_revision
(
tokenizer_name
:
str
):
# Assume that "main" branch always exists
tokenizer
=
get_tokenizer
(
tokenizer_name
,
revision
=
"main"
)
assert
isinstance
(
tokenizer
,
PreTrainedTokenizerBase
)
# Assume that "never" branch always does not exist
with
pytest
.
raises
(
OSError
,
match
=
'not a valid git identifier'
):
get_tokenizer
(
tokenizer_name
,
revision
=
"never"
)
vllm/transformers_utils/tokenizer.py
View file @
a74dee9b
...
...
@@ -58,11 +58,12 @@ def get_tokenizer(
*
args
,
tokenizer_mode
:
str
=
"auto"
,
trust_remote_code
:
bool
=
False
,
tokenizer_
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
download_dir
:
Optional
[
str
]
=
None
,
**
kwargs
,
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
"""Gets a tokenizer for the given model name via Huggingface/modelscope."""
"""Gets a tokenizer for the given model name via HuggingFace or ModelScope.
"""
if
VLLM_USE_MODELSCOPE
:
# download model from ModelScope hub,
# lazy import so that modelscope is not required for normal use.
...
...
@@ -74,7 +75,7 @@ def get_tokenizer(
tokenizer_path
=
snapshot_download
(
model_id
=
tokenizer_name
,
cache_dir
=
download_dir
,
revision
=
tokenizer_
revision
,
revision
=
revision
,
# Ignore weights - we only need the tokenizer.
ignore_file_pattern
=
[
"*.pt"
,
"*.safetensors"
,
"*.bin"
])
tokenizer_name
=
tokenizer_path
...
...
@@ -90,7 +91,7 @@ def get_tokenizer(
tokenizer_name
,
*
args
,
trust_remote_code
=
trust_remote_code
,
tokenizer_revision
=
tokenizer_
revision
,
revision
=
revision
,
**
kwargs
)
except
ValueError
as
e
:
# If the error pertains to the tokenizer class not existing or not
...
...
@@ -114,7 +115,7 @@ def get_tokenizer(
tokenizer_name
,
*
args
,
trust_remote_code
=
trust_remote_code
,
tokenizer_revision
=
tokenizer_
revision
,
revision
=
revision
,
**
kwargs
)
else
:
raise
e
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment