Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
66d18a7f
Unverified
Commit
66d18a7f
authored
Oct 02, 2023
by
Federico Cassano
Committed by
GitHub
Oct 02, 2023
Browse files
add support for tokenizer revision (#1163)
Co-authored-by:
Zhuohan Li
<
zhuohan123@gmail.com
>
parent
ba0bfd40
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
1 deletion
+23
-1
vllm/config.py
vllm/config.py
+5
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+10
-1
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-0
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+4
-0
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+2
-0
No files found.
vllm/config.py
View file @
66d18a7f
...
...
@@ -41,6 +41,9 @@ class ModelConfig:
revision: The specific model version to use. It can be a branch name,
a tag name, or a commit id. If unspecified, will use the default
version.
tokenizer_revision: The specific tokenizer version to use. It can be a
branch name, a tag name, or a commit id. If unspecified, will use
the default version.
max_model_len: Maximum length of a sequence (including prompt and
output). If None, will be derived from the model.
quantization: Quantization method that was used to quantize the model
...
...
@@ -58,6 +61,7 @@ class ModelConfig:
dtype
:
str
,
seed
:
int
,
revision
:
Optional
[
str
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
max_model_len
:
Optional
[
int
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
)
->
None
:
...
...
@@ -69,6 +73,7 @@ class ModelConfig:
self
.
load_format
=
load_format
self
.
seed
=
seed
self
.
revision
=
revision
self
.
tokenizer_revision
=
tokenizer_revision
self
.
quantization
=
quantization
self
.
hf_config
=
get_config
(
model
,
trust_remote_code
,
revision
)
...
...
vllm/engine/arg_utils.py
View file @
66d18a7f
...
...
@@ -29,6 +29,7 @@ class EngineArgs:
max_num_seqs
:
int
=
256
disable_log_stats
:
bool
=
False
revision
:
Optional
[
str
]
=
None
tokenizer_revision
:
Optional
[
str
]
=
None
quantization
:
Optional
[
str
]
=
None
def
__post_init__
(
self
):
...
...
@@ -57,6 +58,13 @@ class EngineArgs:
help
=
'the specific model version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
'the default version.'
)
parser
.
add_argument
(
'--tokenizer-revision'
,
type
=
str
,
default
=
None
,
help
=
'the specific tokenizer version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
'the default version.'
)
parser
.
add_argument
(
'--tokenizer-mode'
,
type
=
str
,
default
=
EngineArgs
.
tokenizer_mode
,
...
...
@@ -175,7 +183,8 @@ class EngineArgs:
self
.
tokenizer_mode
,
self
.
trust_remote_code
,
self
.
download_dir
,
self
.
load_format
,
self
.
dtype
,
self
.
seed
,
self
.
revision
,
self
.
max_model_len
,
self
.
quantization
)
self
.
tokenizer_revision
,
self
.
max_model_len
,
self
.
quantization
)
cache_config
=
CacheConfig
(
self
.
block_size
,
self
.
gpu_memory_utilization
,
self
.
swap_space
,
getattr
(
model_config
.
hf_config
,
'sliding_window'
,
None
))
...
...
vllm/engine/llm_engine.py
View file @
66d18a7f
...
...
@@ -75,6 +75,7 @@ class LLMEngine:
f
"tokenizer=
{
model_config
.
tokenizer
!
r
}
, "
f
"tokenizer_mode=
{
model_config
.
tokenizer_mode
}
, "
f
"revision=
{
model_config
.
revision
}
, "
f
"tokenizer_revision=
{
model_config
.
tokenizer_revision
}
, "
f
"trust_remote_code=
{
model_config
.
trust_remote_code
}
, "
f
"dtype=
{
model_config
.
dtype
}
, "
f
"max_seq_len=
{
model_config
.
max_model_len
}
, "
...
...
@@ -98,6 +99,7 @@ class LLMEngine:
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
trust_remote_code
=
model_config
.
trust_remote_code
,
tokenizer_revision
=
model_config
.
tokenizer_revision
,
revision
=
model_config
.
revision
)
self
.
seq_counter
=
Counter
()
...
...
vllm/entrypoints/llm.py
View file @
66d18a7f
...
...
@@ -42,6 +42,8 @@ class LLM:
quantized and use `dtype` to determine the data type of the weights.
revision: The specific model version to use. It can be a branch name,
a tag name, or a commit id.
tokenizer_revision: The specific tokenizer version to use. It can be a
branch name, a tag name, or a commit id.
seed: The seed to initialize the random number generator for sampling.
gpu_memory_utilization: The ratio (between 0 and 1) of GPU memory to
reserve for the model weights, activations, and KV cache. Higher
...
...
@@ -65,6 +67,7 @@ class LLM:
dtype
:
str
=
"auto"
,
quantization
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
seed
:
int
=
0
,
gpu_memory_utilization
:
float
=
0.9
,
swap_space
:
int
=
4
,
...
...
@@ -81,6 +84,7 @@ class LLM:
dtype
=
dtype
,
quantization
=
quantization
,
revision
=
revision
,
tokenizer_revision
=
tokenizer_revision
,
seed
=
seed
,
gpu_memory_utilization
=
gpu_memory_utilization
,
swap_space
=
swap_space
,
...
...
vllm/transformers_utils/tokenizer.py
View file @
66d18a7f
...
...
@@ -16,6 +16,7 @@ def get_tokenizer(
*
args
,
tokenizer_mode
:
str
=
"auto"
,
trust_remote_code
:
bool
=
False
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
**
kwargs
,
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
"""Gets a tokenizer for the given model name via Huggingface."""
...
...
@@ -37,6 +38,7 @@ def get_tokenizer(
tokenizer_name
,
*
args
,
trust_remote_code
=
trust_remote_code
,
tokenizer_revision
=
tokenizer_revision
,
**
kwargs
)
except
TypeError
as
e
:
# The LLaMA tokenizer causes a protobuf error in some environments.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment