Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
da971ec7
Unverified
Commit
da971ec7
authored
Jun 19, 2024
by
Michael Goin
Committed by
GitHub
Jun 19, 2024
Browse files
[Model] Add FP8 kv cache for Qwen2 (#5656)
parent
3eea7488
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
0 deletions
+14
-0
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+14
-0
No files found.
vllm/model_executor/models/qwen2.py
View file @
da971ec7
...
@@ -46,6 +46,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
...
@@ -46,6 +46,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.sequence
import
SamplerOutput
from
vllm.sequence
import
SamplerOutput
from
vllm.utils
import
print_warning_once
class
Qwen2MLP
(
nn
.
Module
):
class
Qwen2MLP
(
nn
.
Module
):
...
@@ -375,6 +376,19 @@ class Qwen2ForCausalLM(nn.Module):
...
@@ -375,6 +376,19 @@ class Qwen2ForCausalLM(nn.Module):
# Skip loading extra bias for GPTQ models.
# Skip loading extra bias for GPTQ models.
if
name
.
endswith
(
".bias"
)
and
name
not
in
params_dict
:
if
name
.
endswith
(
".bias"
)
and
name
not
in
params_dict
:
continue
continue
# Remapping the name of FP8 kv-scale.
if
name
.
endswith
(
"kv_scale"
):
remapped_kv_scale_name
=
name
.
replace
(
".kv_scale"
,
".attn.kv_scale"
)
if
remapped_kv_scale_name
not
in
params_dict
:
print_warning_once
(
f
"Found kv scale in the checkpoint (e.g.
{
name
}
), "
"but not found the expected name in the model "
f
"(e.g.
{
remapped_kv_scale_name
}
). kv-scale is "
"not loaded."
)
continue
else
:
name
=
remapped_kv_scale_name
param
=
params_dict
[
name
]
param
=
params_dict
[
name
]
weight_loader
=
getattr
(
param
,
"weight_loader"
,
weight_loader
=
getattr
(
param
,
"weight_loader"
,
default_weight_loader
)
default_weight_loader
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment