Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dcbf4286
Unverified
Commit
dcbf4286
authored
Jun 11, 2024
by
sasha0552
Committed by
GitHub
Jun 11, 2024
Browse files
[Frontend] Customizable RoPE theta (#5197)
parent
00e6a2dc
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
8 deletions
+27
-8
tests/test_config.py
tests/test_config.py
+6
-1
vllm/config.py
vllm/config.py
+3
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+8
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-1
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+8
-5
No files found.
tests/test_config.py
View file @
dcbf4286
...
@@ -63,8 +63,9 @@ def test_get_sliding_window():
...
@@ -63,8 +63,9 @@ def test_get_sliding_window():
assert
mistral_model_config
.
get_sliding_window
()
==
TEST_SLIDING_WINDOW
assert
mistral_model_config
.
get_sliding_window
()
==
TEST_SLIDING_WINDOW
def
test_rope_
scaling
():
def
test_rope_
customization
():
TEST_ROPE_SCALING
=
{
"type"
:
"dynamic"
,
"factor"
:
2.0
}
TEST_ROPE_SCALING
=
{
"type"
:
"dynamic"
,
"factor"
:
2.0
}
TEST_ROPE_THETA
=
16_000_000.0
LONGCHAT_ROPE_SCALING
=
{
"type"
:
"linear"
,
"factor"
:
8.0
}
LONGCHAT_ROPE_SCALING
=
{
"type"
:
"linear"
,
"factor"
:
8.0
}
llama_model_config
=
ModelConfig
(
llama_model_config
=
ModelConfig
(
...
@@ -76,6 +77,7 @@ def test_rope_scaling():
...
@@ -76,6 +77,7 @@ def test_rope_scaling():
seed
=
0
,
seed
=
0
,
)
)
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_scaling"
,
None
)
is
None
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_scaling"
,
None
)
is
None
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_theta"
,
None
)
==
500_000
assert
llama_model_config
.
max_model_len
==
8192
assert
llama_model_config
.
max_model_len
==
8192
llama_model_config
=
ModelConfig
(
llama_model_config
=
ModelConfig
(
...
@@ -86,9 +88,12 @@ def test_rope_scaling():
...
@@ -86,9 +88,12 @@ def test_rope_scaling():
dtype
=
"float16"
,
dtype
=
"float16"
,
seed
=
0
,
seed
=
0
,
rope_scaling
=
TEST_ROPE_SCALING
,
rope_scaling
=
TEST_ROPE_SCALING
,
rope_theta
=
TEST_ROPE_THETA
,
)
)
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_scaling"
,
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_scaling"
,
None
)
==
TEST_ROPE_SCALING
None
)
==
TEST_ROPE_SCALING
assert
getattr
(
llama_model_config
.
hf_config
,
"rope_theta"
,
None
)
==
TEST_ROPE_THETA
assert
llama_model_config
.
max_model_len
==
16384
assert
llama_model_config
.
max_model_len
==
16384
longchat_model_config
=
ModelConfig
(
longchat_model_config
=
ModelConfig
(
...
...
vllm/config.py
View file @
dcbf4286
...
@@ -93,6 +93,7 @@ class ModelConfig:
...
@@ -93,6 +93,7 @@ class ModelConfig:
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
code_revision
:
Optional
[
str
]
=
None
,
code_revision
:
Optional
[
str
]
=
None
,
rope_scaling
:
Optional
[
dict
]
=
None
,
rope_scaling
:
Optional
[
dict
]
=
None
,
rope_theta
:
Optional
[
float
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
tokenizer_revision
:
Optional
[
str
]
=
None
,
max_model_len
:
Optional
[
int
]
=
None
,
max_model_len
:
Optional
[
int
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
quantization
:
Optional
[
str
]
=
None
,
...
@@ -113,6 +114,7 @@ class ModelConfig:
...
@@ -113,6 +114,7 @@ class ModelConfig:
self
.
revision
=
revision
self
.
revision
=
revision
self
.
code_revision
=
code_revision
self
.
code_revision
=
code_revision
self
.
rope_scaling
=
rope_scaling
self
.
rope_scaling
=
rope_scaling
self
.
rope_theta
=
rope_theta
# The tokenizer version is consistent with the model version by default.
# The tokenizer version is consistent with the model version by default.
if
tokenizer_revision
is
None
:
if
tokenizer_revision
is
None
:
self
.
tokenizer_revision
=
revision
self
.
tokenizer_revision
=
revision
...
@@ -132,7 +134,7 @@ class ModelConfig:
...
@@ -132,7 +134,7 @@ class ModelConfig:
self
.
skip_tokenizer_init
=
skip_tokenizer_init
self
.
skip_tokenizer_init
=
skip_tokenizer_init
self
.
hf_config
=
get_config
(
self
.
model
,
trust_remote_code
,
revision
,
self
.
hf_config
=
get_config
(
self
.
model
,
trust_remote_code
,
revision
,
code_revision
,
rope_scaling
)
code_revision
,
rope_scaling
,
rope_theta
)
self
.
hf_text_config
=
get_hf_text_config
(
self
.
hf_config
)
self
.
hf_text_config
=
get_hf_text_config
(
self
.
hf_config
)
self
.
dtype
=
_get_and_verify_dtype
(
self
.
hf_text_config
,
dtype
)
self
.
dtype
=
_get_and_verify_dtype
(
self
.
hf_text_config
,
dtype
)
self
.
max_model_len
=
_get_and_verify_max_len
(
self
.
max_model_len
=
_get_and_verify_max_len
(
...
...
vllm/engine/arg_utils.py
View file @
dcbf4286
...
@@ -53,6 +53,7 @@ class EngineArgs:
...
@@ -53,6 +53,7 @@ class EngineArgs:
revision
:
Optional
[
str
]
=
None
revision
:
Optional
[
str
]
=
None
code_revision
:
Optional
[
str
]
=
None
code_revision
:
Optional
[
str
]
=
None
rope_scaling
:
Optional
[
dict
]
=
None
rope_scaling
:
Optional
[
dict
]
=
None
rope_theta
:
Optional
[
float
]
=
None
tokenizer_revision
:
Optional
[
str
]
=
None
tokenizer_revision
:
Optional
[
str
]
=
None
quantization
:
Optional
[
str
]
=
None
quantization
:
Optional
[
str
]
=
None
enforce_eager
:
bool
=
False
enforce_eager
:
bool
=
False
...
@@ -400,6 +401,12 @@ class EngineArgs:
...
@@ -400,6 +401,12 @@ class EngineArgs:
type
=
json
.
loads
,
type
=
json
.
loads
,
help
=
'RoPE scaling configuration in JSON format. '
help
=
'RoPE scaling configuration in JSON format. '
'For example, {"type":"dynamic","factor":2.0}'
)
'For example, {"type":"dynamic","factor":2.0}'
)
parser
.
add_argument
(
'--rope-theta'
,
default
=
None
,
type
=
float
,
help
=
'RoPE theta. Use with `rope_scaling`. In '
'some cases, changing the RoPE theta improves the '
'performance of the scaled model.'
)
parser
.
add_argument
(
'--enforce-eager'
,
parser
.
add_argument
(
'--enforce-eager'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Always use eager-mode PyTorch. If False, '
help
=
'Always use eager-mode PyTorch. If False, '
...
@@ -630,6 +637,7 @@ class EngineArgs:
...
@@ -630,6 +637,7 @@ class EngineArgs:
revision
=
self
.
revision
,
revision
=
self
.
revision
,
code_revision
=
self
.
code_revision
,
code_revision
=
self
.
code_revision
,
rope_scaling
=
self
.
rope_scaling
,
rope_scaling
=
self
.
rope_scaling
,
rope_theta
=
self
.
rope_theta
,
tokenizer_revision
=
self
.
tokenizer_revision
,
tokenizer_revision
=
self
.
tokenizer_revision
,
max_model_len
=
self
.
max_model_len
,
max_model_len
=
self
.
max_model_len
,
quantization
=
self
.
quantization
,
quantization
=
self
.
quantization
,
...
...
vllm/engine/llm_engine.py
View file @
dcbf4286
...
@@ -162,7 +162,7 @@ class LLMEngine:
...
@@ -162,7 +162,7 @@ class LLMEngine:
"Initializing an LLM engine (v%s) with config: "
"Initializing an LLM engine (v%s) with config: "
"model=%r, speculative_config=%r, tokenizer=%r, "
"model=%r, speculative_config=%r, tokenizer=%r, "
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
"rope_scaling=%r, tokenizer_revision=%s, "
"rope_scaling=%r,
rope_theta=%r,
tokenizer_revision=%s, "
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
"disable_custom_all_reduce=%s, quantization=%s, "
"disable_custom_all_reduce=%s, quantization=%s, "
...
@@ -177,6 +177,7 @@ class LLMEngine:
...
@@ -177,6 +177,7 @@ class LLMEngine:
model_config
.
tokenizer_mode
,
model_config
.
tokenizer_mode
,
model_config
.
revision
,
model_config
.
revision
,
model_config
.
rope_scaling
,
model_config
.
rope_scaling
,
model_config
.
rope_theta
,
model_config
.
tokenizer_revision
,
model_config
.
tokenizer_revision
,
model_config
.
trust_remote_code
,
model_config
.
trust_remote_code
,
model_config
.
dtype
,
model_config
.
dtype
,
...
...
vllm/transformers_utils/config.py
View file @
dcbf4286
...
@@ -23,7 +23,8 @@ def get_config(model: str,
...
@@ -23,7 +23,8 @@ def get_config(model: str,
trust_remote_code
:
bool
,
trust_remote_code
:
bool
,
revision
:
Optional
[
str
]
=
None
,
revision
:
Optional
[
str
]
=
None
,
code_revision
:
Optional
[
str
]
=
None
,
code_revision
:
Optional
[
str
]
=
None
,
rope_scaling
:
Optional
[
dict
]
=
None
)
->
PretrainedConfig
:
rope_scaling
:
Optional
[
dict
]
=
None
,
rope_theta
:
Optional
[
float
]
=
None
)
->
PretrainedConfig
:
try
:
try
:
if
VLLM_USE_MODELSCOPE
:
if
VLLM_USE_MODELSCOPE
:
from
modelscope
import
AutoConfig
from
modelscope
import
AutoConfig
...
@@ -50,10 +51,12 @@ def get_config(model: str,
...
@@ -50,10 +51,12 @@ def get_config(model: str,
config
=
config_class
.
from_pretrained
(
model
,
config
=
config_class
.
from_pretrained
(
model
,
revision
=
revision
,
revision
=
revision
,
code_revision
=
code_revision
)
code_revision
=
code_revision
)
if
rope_scaling
is
not
None
:
for
key
,
value
in
[(
"rope_scaling"
,
rope_scaling
),
logger
.
info
(
"Updating rope_scaling from %r to %r"
,
(
"rope_theta"
,
rope_theta
)]:
getattr
(
config
,
"rope_scaling"
,
None
),
rope_scaling
)
if
value
is
not
None
:
config
.
update
({
"rope_scaling"
:
rope_scaling
})
logger
.
info
(
"Updating %s from %r to %r"
,
key
,
getattr
(
config
,
key
,
None
),
value
)
config
.
update
({
key
:
value
})
return
config
return
config
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment