Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fda84b29
Commit
fda84b29
authored
Nov 19, 2025
by
guanyu1
Browse files
test2-1119
parent
f2db436a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
129 additions
and
52 deletions
+129
-52
benchmarks/benchmark_hunyuan.py
benchmarks/benchmark_hunyuan.py
+7
-4
benchmarks/benchmark_qwen3.py
benchmarks/benchmark_qwen3.py
+56
-0
vllm/model_executor/layers/pooler.py
vllm/model_executor/layers/pooler.py
+2
-3
vllm/model_executor/models/adapters_custom/adapters_classify.py
...odel_executor/models/adapters_custom/adapters_classify.py
+64
-45
No files found.
benchmarks/benchmark_hunyuan.py
View file @
fda84b29
...
...
@@ -63,16 +63,19 @@ DEFAULT_PROMPT_TOKEN_IDS = [
15
,
20
,
9080
,
320
,
101396
,
37271
,
5232
,
21
,
20
,
23
,
36827
,
696
,
2929
,
5232
,
82
,
8910
,
6704
,
25451
,
43032
,
127962
,
127960
,
127967
,
]
]
,[
1
,
2
,
3
,
4
,
5
,
6
],[
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
],[
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
65
,
66
,
67
,
68
,
69
,
70
,
71
,
72
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
83
,
84
,
85
,
86
,
87
,
88
,
89
,
90
,
91
,
92
,
93
,
94
,
95
,
96
,
97
,
98
,
99
,
100
]
PROMPTS
=
"你是一个搜索排序专家,请你仔细阅读以下Doc和Query,给出文章满意度评分及具体原因。请注意,本次搜索时间是2024年12月22日。
\n\n
Query:sdl 概述
\n
- Query领域:游戏
\n
- 时效需求:无时效
\n
- 权威需求:中权威
\n\n
Doc内容:
\n
- 标题:【SDL实践指南】SDL基本介绍
\n
- 正文:文章前言传统的软件开发生命周期关注核心点在于需求分析、需求设计和需求编码实现,但是事实证明只注重软件业务功能设计实现而缺乏对产品安全考量的产品终将会为此而付出惨痛的代价,例如:因为缺乏对安全相关法律法规的了解,在产品业务功能实现中过度采集用户个人数据信息并将其进行滥用和不安全的存储而带来的法律追责问题和应用产品强制下架整改;因为产品发版后被攻击者恶意攻击造成系统无法正常使用而被迫强制下线进行漏洞修复等,为了规避产品上线后由于潜在的安全问题而带来的经济损失以及时间成本等问题SDL应运而生。SDL简介SDL(Security Development Lifecycle,安全开发生命周期)是由微软提出的一种从安全角度指导软件开发的管理模式,它主要通过在传统的软件开发生命周期的各个阶段穿插一系列的安全活动来保障和提升产品自身的安全能力SDL侧重于软件开发过程中的安全保证过程,旨在开发出安全的软件应
\n\n
Doc作者:
\n
- 作者名称:七芒星实验室
\n
- 作者领域:科技_信息技术
\n
- 作者认证:个人
\n\n
Doc来源:腾讯网
\n
- 平台领域:综合站点
\n
- 备案类型:企业
\n
- 平台等级:10
\n
- 权威等级:1
\n\n
Doc时间:2023年03月05日 (距今:658天)
\n\n
Query:sdl 概述"
if
__name__
==
"__main__"
:
llm
=
LLM
(
model
=
"/tools/gy_model/test_
1
"
,
task
=
"classify"
,
trust_remote_code
=
True
,
enforce_eager
=
True
)
llm
=
LLM
(
model
=
"/tools/gy_model/test_
2
"
,
task
=
"classify"
,
trust_remote_code
=
True
,
enforce_eager
=
True
)
# token_id
#(output,) = llm.classify([token_inputs(ids) for ids in DEFAULT_PROMPT_TOKEN_IDS])
(
output
,)
=
llm
.
classify
(
token_inputs
(
DEFAULT_PROMPT_TOKEN_IDS
))
outputs
=
llm
.
classify
([
token_inputs
(
ids
)
for
ids
in
DEFAULT_PROMPT_TOKEN_IDS
])
for
i
,
out
in
enumerate
(
outputs
):
probs
=
out
.
outputs
.
probs
print
(
f
"Request
{
i
}
, class probs =
{
probs
}
"
)
#(output,) = llm.classify(token_inputs(DEFAULT_PROMPT_TOKEN_IDS))
#prompts
# (output,) = llm.classify(PROMPTS)
# probs = output.outputs.probs
...
...
benchmarks/benchmark_qwen3.py
0 → 100644
View file @
fda84b29
#!/usr/bin/env python3
"""Minimal Qwen3 classify demo.
This mirrors the docs example:
llm = LLM(model="Qwen/Qwen3-Reranker-0.6B", runner="pooling")
outputs = llm.classify(["prompt 1", "prompt 2"]) # batched
You can run with raw text prompts (default) or with token IDs.
"""
import
os
from
vllm
import
LLM
from
vllm.inputs
import
token_inputs
# Two sample prompts (batch of 2)
PROMPTS
=
[
"你是一个搜索排序专家,请你仔细阅读以下Doc和Query,给出文章满意度评分及具体原因。Query:sdl 概述"
,
"请根据以下文档与查询,判断文档是否满足查询意图,并给出理由。Query:sdl 概述"
,
]
def
run_with_prompts
(
llm
:
LLM
):
outputs
=
llm
.
classify
(
PROMPTS
)
for
i
,
out
in
enumerate
(
outputs
):
probs
=
out
.
outputs
.
probs
print
(
f
"[text] Prompt
{
i
}
-> probs:
{
probs
}
(num_classes=
{
len
(
probs
)
}
)"
)
def
run_with_token_ids
(
llm
:
LLM
):
# Build token IDs using the model tokenizer so IDs match Qwen3.
tok
=
llm
.
get_tokenizer
()
ids_batch
=
[
tok
.
encode
(
p
)
for
p
in
PROMPTS
]
outputs
=
llm
.
classify
([
token_inputs
(
ids
)
for
ids
in
ids_batch
])
for
i
,
out
in
enumerate
(
outputs
):
probs
=
out
.
outputs
.
probs
print
(
f
"[tokens] Prompt
{
i
}
-> probs:
{
probs
}
(num_classes=
{
len
(
probs
)
}
)"
)
if
__name__
==
"__main__"
:
# Set via env QWEN3_MODEL to override, e.g.: Qwen/Qwen3-Reranker-0.6B
model
=
os
.
environ
.
get
(
"QWEN3_MODEL"
,
"Qwen/Qwen3-Reranker-0.6B"
)
llm
=
LLM
(
model
=
model
,
task
=
"classify"
,
# deprecated but still supported; maps to pooling+convert
trust_remote_code
=
True
,
enforce_eager
=
True
,
)
# Option A: classify using text prompts (recommended)
run_with_prompts
(
llm
)
# Option B: classify using token IDs (uncomment to try)
# run_with_token_ids(llm)
vllm/model_executor/layers/pooler.py
View file @
fda84b29
...
...
@@ -645,10 +645,9 @@ class ClassifierPooler(Pooler):
# pooled_data shape: [batchsize, hidden_size]
pooled_data
=
pooled_data
.
to
(
self
.
head_dtype
)
if
self
.
classifier
is
not
None
:
pooled_data
=
self
.
classifier
(
pooled_data
)
# pooled_data shape: [batchsize, num_labels]
pooled_data
=
self
.
classifier
(
pooled_data
,
pooling_metadata
)
if
self
.
logit_bias
is
not
None
:
pooled_data
-=
self
.
logit_bias
...
...
vllm/model_executor/models/adapters_custom/adapters_classify.py
View file @
fda84b29
...
...
@@ -9,7 +9,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
import
torch
import
torch.nn
as
nn
from
vllm.v1.pool.metadata
import
PoolingCursor
,
PoolingMetadata
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
get_act_fn
...
...
@@ -350,16 +350,31 @@ def new_hy_05b_dense_official_classification(cls: _T) -> _T:
})
def
_classifier
(
self
,
x
:
torch
.
Tensor
):
x
=
self
.
pool_head
(
x
)
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
x
=
torch
.
tanh
(
x
)
x
=
self
.
pool_head2
(
x
)
if
isinstance
(
x
,
tuple
):
x
=
x
[
0
]
return
x
def
_classifier
(
self
,
x
:
torch
.
Tensor
,
pooling_metadata
:
PoolingMetadata
=
None
):
# Apply classification head to obtain per-class logits
pooled_output
=
self
.
pool_head
(
x
)
if
isinstance
(
pooled_output
,
tuple
):
pooled_output
=
pooled_output
[
0
]
pooled_output
=
torch
.
tanh
(
pooled_output
)
pooled_output
=
self
.
pool_head2
(
pooled_output
)
if
isinstance
(
pooled_output
,
tuple
):
pooled_output
=
pooled_output
[
0
]
# Select logits at the last non-pad token position per sequence
# seq_length: [batch]
# cursor = pooling_metadata.pooling_cursor
# lengths = cursor.num_scheduled_tokens_cpu.tolist() # 每个 request 本步实际调度的 token 数
# chunks = torch.split(self.input_ids, lengths)
# reward=[]
# for chunk in chunks:
# seq_length=(chunk != self.pad_id.long())
# reward.append(pooled_output)
# seq_length = (self.input_ids != self.pad_id).long().sum(dim=1) - 1
# batch_size = self.input_ids.size(0)
# reward = pooled_output[torch.arange(batch_size, device=pooled_output.device),
# seq_length].squeeze(-1)
return
pooled_output
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
...
...
@@ -522,51 +537,56 @@ def hy_2b_dense_classification_official_hf_multihead_full_mask(cls: _T) -> _T:
hidden
=
torch
.
relu
(
hidden
)
# hidden = torch.softmax(hidden, dim=1)
return
hidden
def
_classifier
(
self
,
x
:
torch
.
Tensor
):
def
_classifier
(
self
,
x
:
torch
.
Tensor
,
pooling_metadata
:
PoolingMetadata
=
None
):
pooled_output
=
self
.
pool_head
(
x
)
if
isinstance
(
pooled_output
,
tuple
):
pooled_output
=
pooled_output
[
0
]
pooled_output
=
torch
.
tanh
(
pooled_output
)
pooled_output_sat
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_rel
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_time
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_auth
=
self
.
pool_head2
(
pooled_output
).
contiguous
()
# bs * class_num
pooled_output_sat
=
self
.
pool_head2
(
pooled_output
)
# bs * class_num
pooled_output_rel
=
self
.
pool_head2
(
pooled_output
)
# bs * class_num
pooled_output_time
=
self
.
pool_head2
(
pooled_output
)
# bs * class_num
pooled_output_auth
=
self
.
pool_head2
(
pooled_output
)
# bs * class_num
qfeat
=
torch
.
tensor
([[
2
,
0
,
20
]],
device
=
pooled_output
.
device
)
qfeat
=
qfeat
.
to
(
dtype
=
pooled_output
.
dtype
)
qhidden
=
self
.
encode_qfeat
(
qfeat
)
a_wei
=
self
.
qfeat_fc2
(
qhidden
)
a_bias
=
self
.
qfeat_fc3
(
qhidden
)
if
self
.
config
.
pool_type
==
"mean"
:
reward
=
pooled_output
.
mean
(
dim
=
1
).
squeeze
(
-
1
)
elif
self
.
config
.
pool_type
==
"last"
:
# bs * hidden_size
seq_length
=
(
self
.
input_ids
!=
self
.
pad_id
).
long
().
sum
(
dim
=
1
)
-
1
batch_size
=
self
.
input_ids
.
size
(
0
)
sat_logits
=
pooled_output_sat
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
1
]
auth_logits
=
pooled_output_auth
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
2
]
time_logits
=
pooled_output_time
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
3
]
rel_logits
=
pooled_output_rel
[
torch
.
arange
(
batch_size
,
device
=
pooled_output
.
device
),
seq_length
-
4
]
# a_score = torch.sigmoid(torch.concat([rel_logits, time_logits, auth_logits], dim=1))
multii_logits
=
torch
.
concat
([
rel_logits
,
time_logits
,
auth_logits
],
dim
=
1
)
task_logits
=
(
a_wei
*
multii_logits
+
a_bias
).
sum
(
dim
=
1
,
keepdim
=
True
)
task_logits
=
torch
.
sigmoid
(
task_logits
)
#gate_time = (a_wei * multii_logits + wei_time).sum(dim=1, keepdim=True)
#gate_time = torch.sigmoid(gate_time)
#gate_auth = (a_wei * multii_logits + wei_auth).sum(dim=1, keepdim=True)
#gate_auth = torch.sigmoid(gate_auth)
sat_logits_new
=
task_logits
*
sat_logits
#logits = 2.0 * sat_logits_new.detach() + 0.25 * (qfeat[:,0].float().unsqueeze(1)) * gate_time * time_logits.detach() + 0.5 * (qfeat[:,1].float().unsqueeze(1) + 0.4) * gate_auth * auth_logits.detach()
logits
=
sat_logits_new
reward
=
logits
.
squeeze
(
-
1
)
else
:
reward
=
pooled_output
[:,
0
].
squeeze
(
-
1
)
sat_logits
=
pooled_output_sat
[:,
-
1
]
auth_logits
=
pooled_output_auth
[:,
-
2
]
time_logits
=
pooled_output_time
[:,
-
3
]
rel_logits
=
pooled_output_rel
[:,
-
4
]
multii_logits
=
torch
.
concat
([
rel_logits
,
time_logits
,
auth_logits
],
dim
=
1
)
task_logits
=
(
a_wei
*
multii_logits
+
a_bias
).
sum
(
dim
=
1
,
keepdim
=
True
)
task_logits
=
torch
.
sigmoid
(
task_logits
)
sat_logits_new
=
task_logits
*
sat_logits
logits
=
sat_logits_new
reward
=
logits
# sat_logits = pooled_output_sat[torch.arange(batch_size, device=pooled_output.device), seq_length-1]
# auth_logits = pooled_output_auth[torch.arange(batch_size, device=pooled_output.device), seq_length-2]
# time_logits = pooled_output_time[torch.arange(batch_size, device=pooled_output.device), seq_length-3]
# rel_logits = pooled_output_rel[torch.arange(batch_size, device=pooled_output.device), seq_length-4]
# # a_score = torch.sigmoid(torch.concat([rel_logits, time_logits, auth_logits], dim=1))
# multii_logits = torch.concat([rel_logits, time_logits, auth_logits], dim=1)
# task_logits = (a_wei * multii_logits + a_bias).sum(dim=1, keepdim=True)
# task_logits = torch.sigmoid(task_logits)
# #gate_time = (a_wei * multii_logits + wei_time).sum(dim=1, keepdim=True)
# #gate_time = torch.sigmoid(gate_time)
# #gate_auth = (a_wei * multii_logits + wei_auth).sum(dim=1, keepdim=True)
# #gate_auth = torch.sigmoid(gate_auth)
# sat_logits_new = task_logits * sat_logits
# #logits = 2.0 * sat_logits_new.detach() + 0.25 * (qfeat[:,0].float().unsqueeze(1)) * gate_time * time_logits.detach() + 0.5 * (qfeat[:,1].float().unsqueeze(1) + 0.4) * gate_auth * auth_logits.detach()
# logits = sat_logits_new
# reward = logits.squeeze(-1)
return
reward
return
pooled_output
#reward
def
forward
(
self
,
...
...
@@ -768,4 +788,3 @@ def seq_cls_model_loader(model, weights: Iterable[tuple[str, torch.Tensor]]):
assert
method
in
SEQ_CLS_LOAD_METHODS
,
f
"method
{
method
}
not supported"
return
SEQ_CLS_LOAD_METHODS
[
method
](
model
,
weights
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment