Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xdb4_94051
vllm
Commits
66d18a7f
Unverified
Commit
66d18a7f
authored
Oct 02, 2023
by
Federico Cassano
Committed by
GitHub
Oct 02, 2023
Browse files
add support for tokenizer revision (#1163)
Co-authored-by:
Zhuohan Li
<
zhuohan123@gmail.com
>
parent
ba0bfd40
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
1 deletion
+23
-1
vllm/config.py
vllm/config.py
+5
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+10
-1
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-0
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+4
-0
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+2
-0
No files found.
vllm/config.py
View file @
66d18a7f
...
@@ -41,6 +41,9 @@ class ModelConfig:
...
@@ -41,6 +41,9 @@ class ModelConfig:
revision: The specific model version to use. It can be a branch name,
revision: The specific model version to use. It can be a branch name,
a tag name, or a commit id. If unspecified, will use the default
a tag name, or a commit id. If unspecified, will use the default
version.
version.
tokenizer_revision: The specific tokenizer version to use. It can be a
branch name, a tag name, or a commit id. If unspecified, will use
the default version.
max_model_len: Maximum length of a sequence (including prompt and
max_model_len: Maximum length of a sequence (including prompt and
output). If None, will be derived from the model.
output). If None, will be derived from the model.
quantization: Quantization method that was used to quantize the model
quantization: Quantization method that was used to quantize the model
...
@@ -58,6 +61,7 @@ class ModelConfig:
...
@@ -58,6 +61,7 @@ class ModelConfig:
dtype
:
str
,
dtype
:
str
,
seed
:
int
,
seed
:
int
,
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
max_model_len
:
Optional
[
int
]
=
None
,
max_model_len
:
Optional
[
int
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
)
->
None
:
)
->
None
:
...
@@ -69,6 +73,7 @@ class ModelConfig:
...
@@ -69,6 +73,7 @@ class ModelConfig:
self
.
load_format
=
load_format
self
.
load_format
=
load_format
self
.
seed
=
seed
self
.
seed
=
seed
self
.
revision
=
revision
self
.
revision
=
revision
self
.
tokenizer_revision
=
tokenizer_revision
self
.
quantization
=
quantization
self
.
quantization
=
quantization
self
.
hf_config
=
get_config
(
model
,
trust_remote_code
,
revision
)
self
.
hf_config
=
get_config
(
model
,
trust_remote_code
,
revision
)
...
...
vllm/engine/arg_utils.py
View file @
66d18a7f
...
@@ -29,6 +29,7 @@ class EngineArgs:
...
@@ -29,6 +29,7 @@ class EngineArgs:
max_num_seqs
:
int
=
256
max_num_seqs
:
int
=
256
disable_log_stats
:
bool
=
False
disable_log_stats
:
bool
=
False
revision
:
Optional
[
str
]
=
None
revision
:
Optional
[
str
]
=
None
tokenizer_revision
:
Optional
[
str
]
=
None
quantization
:
Optional
[
str
]
=
None
quantization
:
Optional
[
str
]
=
None
def
__post_init__
(
self
):
def
__post_init__
(
self
):
...
@@ -57,6 +58,13 @@ class EngineArgs:
...
@@ -57,6 +58,13 @@ class EngineArgs:
help
=
'the specific model version to use. It can be a branch '
help
=
'the specific model version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
'name, a tag name, or a commit id. If unspecified, will use '
'the default version.'
)
'the default version.'
)
parser
.
add_argument
(
'--tokenizer-revision'
,
type
=
str
,
default
=
None
,
help
=
'the specific tokenizer version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
'the default version.'
)
parser
.
add_argument
(
'--tokenizer-mode'
,
parser
.
add_argument
(
'--tokenizer-mode'
,
type
=
str
,
type
=
str
,
default
=
EngineArgs
.
tokenizer_mode
,
default
=
EngineArgs
.
tokenizer_mode
,
...
@@ -175,7 +183,8 @@ class EngineArgs:
...
@@ -175,7 +183,8 @@ class EngineArgs:
self
.
tokenizer_mode
,
self
.
trust_remote_code
,
self
.
tokenizer_mode
,
self
.
trust_remote_code
,
self
.
download_dir
,
self
.
load_format
,
self
.
download_dir
,
self
.
load_format
,
self
.
dtype
,
self
.
seed
,
self
.
revision
,
self
.
dtype
,
self
.
seed
,
self
.
revision
,
self
.
max_model_len
,
self
.
quantization
)
self
.
tokenizer_revision
,
self
.
max_model_len
,
self
.
quantization
)
cache_config
=
CacheConfig
(
cache_config
=
CacheConfig
(
self
.
block_size
,
self
.
gpu_memory_utilization
,
self
.
swap_space
,
self
.
block_size
,
self
.
gpu_memory_utilization
,
self
.
swap_space
,
getattr
(
model_config
.
hf_config
,
'sliding_window'
,
None
))
getattr
(
model_config
.
hf_config
,
'sliding_window'
,
None
))
...
...
vllm/engine/llm_engine.py
View file @
66d18a7f
...
@@ -75,6 +75,7 @@ class LLMEngine:
...
@@ -75,6 +75,7 @@ class LLMEngine:
f
"tokenizer=
{
model_config
.
tokenizer
!
r
}
, "
f
"tokenizer=
{
model_config
.
tokenizer
!
r
}
, "
f
"tokenizer_mode=
{
model_config
.
tokenizer_mode
}
, "
f
"tokenizer_mode=
{
model_config
.
tokenizer_mode
}
, "
f
"revision=
{
model_config
.
revision
}
, "
f
"revision=
{
model_config
.
revision
}
, "
f
"tokenizer_revision=
{
model_config
.
tokenizer_revision
}
, "
f
"trust_remote_code=
{
model_config
.
trust_remote_code
}
, "
f
"trust_remote_code=
{
model_config
.
trust_remote_code
}
, "
f
"dtype=
{
model_config
.
dtype
}
, "
f
"dtype=
{
model_config
.
dtype
}
, "
f
"max_seq_len=
{
model_config
.
max_model_len
}
, "
f
"max_seq_len=
{
model_config
.
max_model_len
}
, "
...
@@ -98,6 +99,7 @@ class LLMEngine:
...
@@ -98,6 +99,7 @@ class LLMEngine:
model_config
.
tokenizer
,
model_config
.
tokenizer
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
tokenizer_mode
=
model_config
.
tokenizer_mode
,
trust_remote_code
=
model_config
.
trust_remote_code
,
trust_remote_code
=
model_config
.
trust_remote_code
,
tokenizer_revision
=
model_config
.
tokenizer_revision
,
revision
=
model_config
.
revision
)
revision
=
model_config
.
revision
)
self
.
seq_counter
=
Counter
()
self
.
seq_counter
=
Counter
()
...
...
vllm/entrypoints/llm.py
View file @
66d18a7f
...
@@ -42,6 +42,8 @@ class LLM:
...
@@ -42,6 +42,8 @@ class LLM:
quantized and use `dtype` to determine the data type of the weights.
quantized and use `dtype` to determine the data type of the weights.
revision: The specific model version to use. It can be a branch name,
revision: The specific model version to use. It can be a branch name,
a tag name, or a commit id.
a tag name, or a commit id.
tokenizer_revision: The specific tokenizer version to use. It can be a
branch name, a tag name, or a commit id.
seed: The seed to initialize the random number generator for sampling.
seed: The seed to initialize the random number generator for sampling.
gpu_memory_utilization: The ratio (between 0 and 1) of GPU memory to
gpu_memory_utilization: The ratio (between 0 and 1) of GPU memory to
reserve for the model weights, activations, and KV cache. Higher
reserve for the model weights, activations, and KV cache. Higher
...
@@ -65,6 +67,7 @@ class LLM:
...
@@ -65,6 +67,7 @@ class LLM:
dtype
:
str
=
"auto"
,
dtype
:
str
=
"auto"
,
quantization
:
Optional
[
str
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
seed
:
int
=
0
,
seed
:
int
=
0
,
gpu_memory_utilization
:
float
=
0.9
,
gpu_memory_utilization
:
float
=
0.9
,
swap_space
:
int
=
4
,
swap_space
:
int
=
4
,
...
@@ -81,6 +84,7 @@ class LLM:
...
@@ -81,6 +84,7 @@ class LLM:
dtype
=
dtype
,
dtype
=
dtype
,
quantization
=
quantization
,
quantization
=
quantization
,
revision
=
revision
,
revision
=
revision
,
tokenizer_revision
=
tokenizer_revision
,
seed
=
seed
,
seed
=
seed
,
gpu_memory_utilization
=
gpu_memory_utilization
,
gpu_memory_utilization
=
gpu_memory_utilization
,
swap_space
=
swap_space
,
swap_space
=
swap_space
,
...
...
vllm/transformers_utils/tokenizer.py
View file @
66d18a7f
...
@@ -16,6 +16,7 @@ def get_tokenizer(
...
@@ -16,6 +16,7 @@ def get_tokenizer(
*
args
,
*
args
,
tokenizer_mode
:
str
=
"auto"
,
tokenizer_mode
:
str
=
"auto"
,
trust_remote_code
:
bool
=
False
,
trust_remote_code
:
bool
=
False
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
**
kwargs
,
**
kwargs
,
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
"""Gets a tokenizer for the given model name via Huggingface."""
"""Gets a tokenizer for the given model name via Huggingface."""
...
@@ -37,6 +38,7 @@ def get_tokenizer(
...
@@ -37,6 +38,7 @@ def get_tokenizer(
tokenizer_name
,
tokenizer_name
,
*
args
,
*
args
,
trust_remote_code
=
trust_remote_code
,
trust_remote_code
=
trust_remote_code
,
tokenizer_revision
=
tokenizer_revision
,
**
kwargs
)
**
kwargs
)
except
TypeError
as
e
:
except
TypeError
as
e
:
# The LLaMA tokenizer causes a protobuf error in some environments.
# The LLaMA tokenizer causes a protobuf error in some environments.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment