Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ce69f7f7
Unverified
Commit
ce69f7f7
authored
Jan 27, 2025
by
Isotr0py
Committed by
GitHub
Jan 27, 2025
Browse files
[Bugfix] Fix gpt2 GGUF inference (#12467)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
624a1e47
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
11 deletions
+8
-11
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+8
-11
No files found.
vllm/model_executor/models/gpt2.py
View file @
ce69f7f7
...
@@ -258,13 +258,13 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
...
@@ -258,13 +258,13 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
self
.
transformer
=
GPT2Model
(
vllm_config
=
vllm_config
,
self
.
transformer
=
GPT2Model
(
vllm_config
=
vllm_config
,
prefix
=
maybe_prefix
(
prefix
=
maybe_prefix
(
prefix
,
"transformer"
))
prefix
,
"transformer"
))
self
.
lm_head
=
ParallelLMHead
(
self
.
config
.
vocab_size
,
self
.
config
.
hidden_size
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.lm_head"
)
if
self
.
config
.
tie_word_embeddings
:
if
self
.
config
.
tie_word_embeddings
:
self
.
lm_head
=
self
.
transformer
.
wte
self
.
lm_head
=
self
.
lm_head
.
tie_weights
(
self
.
transformer
.
wte
)
else
:
self
.
lm_head
=
ParallelLMHead
(
self
.
config
.
vocab_size
,
self
.
config
.
hidden_size
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.lm_head"
)
self
.
logits_processor
=
LogitsProcessor
(
config
.
vocab_size
)
self
.
logits_processor
=
LogitsProcessor
(
config
.
vocab_size
)
self
.
sampler
=
get_sampler
()
self
.
sampler
=
get_sampler
()
self
.
make_empty_intermediate_tensors
=
(
self
.
make_empty_intermediate_tensors
=
(
...
@@ -309,15 +309,12 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
...
@@ -309,15 +309,12 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
params_dict
=
dict
(
self
.
named_parameters
(
remove_duplicate
=
False
))
params_dict
=
dict
(
self
.
named_parameters
(
remove_duplicate
=
False
))
loaded_params
:
Set
[
str
]
=
set
()
loaded_params
:
Set
[
str
]
=
set
()
for
name
,
loaded_weight
in
weights
:
for
name
,
loaded_weight
in
weights
:
if
name
.
startswith
(
"lm_head"
):
# GPT-2 ties the weights of the embedding layer and the final
# linear layer.
continue
if
".attn.bias"
in
name
or
".attn.masked_bias"
in
name
:
if
".attn.bias"
in
name
or
".attn.masked_bias"
in
name
:
# Skip attention mask.
# Skip attention mask.
# NOTE: "c_attn.bias" should not be skipped.
# NOTE: "c_attn.bias" should not be skipped.
continue
continue
if
not
name
.
startswith
(
"transformer."
):
if
not
name
.
startswith
(
"transformer."
)
and
not
name
.
startswith
(
"lm_head"
):
name
=
"transformer."
+
name
name
=
"transformer."
+
name
if
is_pp_missing_parameter
(
name
,
self
):
if
is_pp_missing_parameter
(
name
,
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment