Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
83609791
Unverified
Commit
83609791
authored
Jan 20, 2025
by
Isotr0py
Committed by
GitHub
Jan 20, 2025
Browse files
[Model] Add Qwen2 PRM model support (#12202)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
0974c9bc
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
45 additions
and
13 deletions
+45
-13
docs/source/models/supported_models.md
docs/source/models/supported_models.md
+5
-0
tests/models/embedding/language/test_embedding.py
tests/models/embedding/language/test_embedding.py
+5
-4
tests/models/registry.py
tests/models/registry.py
+1
-0
vllm/model_executor/models/qwen2_rm.py
vllm/model_executor/models/qwen2_rm.py
+33
-9
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
No files found.
docs/source/models/supported_models.md
View file @
83609791
...
...
@@ -470,6 +470,11 @@ of the whole prompt are extracted from the normalized hidden state corresponding
- `Qwen/Qwen2.5-Math-RM-72B`, etc.
- ✅︎
- ✅︎
* - `Qwen2ForProcessRewardModel`
- Qwen2-based
- `Qwen/Qwen2.5-Math-PRM-7B`, `Qwen/Qwen2.5-Math-PRM-72B`, etc.
- ✅︎
- ✅︎
```
If your model is not in the above list, we will try to automatically convert the model using
...
...
tests/models/embedding/language/test_embedding.py
View file @
83609791
...
...
@@ -17,14 +17,15 @@ from ..utils import check_embeddings_close
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
"sentence-transformers/all-MiniLM-L12-v2"
),
pytest
.
param
(
"intfloat/multilingual-e5-large"
),
# [Encoder-decoder]
pytest
.
param
(
"intfloat/e5-mistral-7b-instruct"
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
# [Decoder-only]
pytest
.
param
(
"BAAI/bge-multilingual-gemma2"
,
marks
=
[
pytest
.
mark
.
core_model
]),
pytest
.
param
(
"ssmits/Qwen2-7B-Instruct-embed-base"
),
pytest
.
param
(
"intfloat/e5-mistral-7b-instruct"
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
]),
pytest
.
param
(
"Alibaba-NLP/gte-Qwen2-1.5B-instruct"
),
pytest
.
param
(
"Alibaba-NLP/gte-Qwen2-7B-instruct"
),
pytest
.
param
(
"ssmits/Qwen2-7B-Instruct-embed-base"
),
# [Encoder-decoder]
pytest
.
param
(
"sentence-transformers/stsb-roberta-base-v2"
),
],
)
...
...
tests/models/registry.py
View file @
83609791
...
...
@@ -155,6 +155,7 @@ _EMBEDDING_EXAMPLE_MODELS = {
"MistralModel"
:
_HfExamplesInfo
(
"intfloat/e5-mistral-7b-instruct"
),
"Qwen2Model"
:
_HfExamplesInfo
(
"ssmits/Qwen2-7B-Instruct-embed-base"
),
"Qwen2ForRewardModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Math-RM-72B"
),
"Qwen2ForProcessRewardModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Math-PRM-7B"
),
"Qwen2ForSequenceClassification"
:
_HfExamplesInfo
(
"jason9693/Qwen2.5-1.5B-apeach"
),
# noqa: E501
"RobertaModel"
:
_HfExamplesInfo
(
"sentence-transformers/stsb-roberta-base-v2"
),
# noqa: E501
"RobertaForMaskedLM"
:
_HfExamplesInfo
(
"sentence-transformers/all-roberta-large-v1"
),
# noqa: E501
...
...
vllm/model_executor/models/qwen2_rm.py
View file @
83609791
...
...
@@ -12,7 +12,7 @@ from vllm.attention import AttentionMetadata
from
vllm.config
import
VllmConfig
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.pooler
import
Pooler
,
PoolingType
from
vllm.model_executor.layers.pooler
import
Pooler
,
PoolingType
,
SimplePooler
from
vllm.model_executor.pooling_metadata
import
PoolingMetadata
from
vllm.sequence
import
IntermediateTensors
,
PoolerOutput
...
...
@@ -32,7 +32,7 @@ class ReLU(nn.Module):
return
self
.
activation
(
input
)
class
Qwen2
For
RewardModel
(
nn
.
Module
,
SupportsLoRA
,
SupportsPP
):
class
Qwen2Reward
Base
Model
(
nn
.
Module
,
SupportsLoRA
,
SupportsPP
):
packed_modules_mapping
=
{
"qkv_proj"
:
[
"q_proj"
,
...
...
@@ -60,7 +60,6 @@ class Qwen2ForRewardModel(nn.Module, SupportsLoRA, SupportsPP):
config
=
vllm_config
.
model_config
.
hf_config
quant_config
=
vllm_config
.
quant_config
lora_config
=
vllm_config
.
lora_config
pooler_config
=
vllm_config
.
model_config
.
pooler_config
self
.
config
=
config
self
.
lora_config
=
lora_config
...
...
@@ -74,14 +73,11 @@ class Qwen2ForRewardModel(nn.Module, SupportsLoRA, SupportsPP):
config
.
hidden_size
,
quant_config
=
quant_config
),
ReLU
(),
RowParallelLinear
(
config
.
hidden_size
,
1
,
RowParallelLinear
(
config
.
hidden_size
,
config
.
num_labels
,
quant_config
=
quant_config
),
)
self
.
_pooler
=
Pooler
.
from_config_with_defaults
(
pooler_config
,
pooling_type
=
PoolingType
.
ALL
,
normalize
=
False
,
softmax
=
False
)
self
.
_pooler
:
SimplePooler
self
.
make_empty_intermediate_tensors
=
(
self
.
model
.
make_empty_intermediate_tensors
)
...
...
@@ -115,3 +111,31 @@ class Qwen2ForRewardModel(nn.Module, SupportsLoRA, SupportsPP):
loader
=
AutoWeightsLoader
(
self
,
ignore_unexpected_prefixes
=
[
"lm_head."
])
return
loader
.
load_weights
(
weights
)
class
Qwen2ForRewardModel
(
Qwen2RewardBaseModel
):
def
__init__
(
self
,
*
,
vllm_config
,
prefix
=
""
):
vllm_config
.
model_config
.
hf_config
.
num_labels
=
1
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
pooler_config
=
vllm_config
.
model_config
.
pooler_config
self
.
_pooler
=
Pooler
.
from_config_with_defaults
(
pooler_config
,
pooling_type
=
PoolingType
.
ALL
,
normalize
=
False
,
softmax
=
False
)
class
Qwen2ForProcessRewardModel
(
Qwen2RewardBaseModel
):
def
__init__
(
self
,
*
,
vllm_config
,
prefix
=
""
):
vllm_config
.
model_config
.
hf_config
.
num_labels
=
2
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
pooler_config
=
vllm_config
.
model_config
.
pooler_config
self
.
_pooler
=
Pooler
.
from_config_with_defaults
(
pooler_config
,
pooling_type
=
PoolingType
.
STEP
,
normalize
=
False
,
softmax
=
True
,
step_tag_id
=
151651
,
)
vllm/model_executor/models/registry.py
View file @
83609791
...
...
@@ -127,6 +127,7 @@ _EMBEDDING_MODELS = {
"Qwen2Model"
:
(
"qwen2"
,
"Qwen2EmbeddingModel"
),
"Qwen2ForCausalLM"
:
(
"qwen2"
,
"Qwen2ForCausalLM"
),
"Qwen2ForRewardModel"
:
(
"qwen2_rm"
,
"Qwen2ForRewardModel"
),
"Qwen2ForProcessRewardModel"
:
(
"qwen2_rm"
,
"Qwen2ForProcessRewardModel"
),
"TeleChat2ForCausalLM"
:
(
"telechat2"
,
"TeleChat2ForCausalLM"
),
# [Multimodal]
"LlavaNextForConditionalGeneration"
:
(
"llava_next"
,
"LlavaNextForConditionalGeneration"
),
# noqa: E501
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment