Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
05970c77
Unverified
Commit
05970c77
authored
Feb 26, 2026
by
Wentao Ye
Committed by
GitHub
Feb 26, 2026
Browse files
[Refactor] Remove dead code for attention benchmark script (#35418)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
d9406076
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
95 deletions
+0
-95
benchmarks/attention_benchmarks/__init__.py
benchmarks/attention_benchmarks/__init__.py
+0
-2
benchmarks/attention_benchmarks/common.py
benchmarks/attention_benchmarks/common.py
+0
-93
No files found.
benchmarks/attention_benchmarks/__init__.py
View file @
05970c77
...
@@ -15,7 +15,6 @@ from .common import (
...
@@ -15,7 +15,6 @@ from .common import (
BenchmarkConfig
,
BenchmarkConfig
,
BenchmarkResult
,
BenchmarkResult
,
MockLayer
,
MockLayer
,
MockModelConfig
,
ResultsFormatter
,
ResultsFormatter
,
get_attention_scale
,
get_attention_scale
,
is_mla_backend
,
is_mla_backend
,
...
@@ -36,7 +35,6 @@ __all__ = [
...
@@ -36,7 +35,6 @@ __all__ = [
"ResultsFormatter"
,
"ResultsFormatter"
,
# Mock objects
# Mock objects
"MockLayer"
,
"MockLayer"
,
"MockModelConfig"
,
# Utilities
# Utilities
"setup_mla_dims"
,
"setup_mla_dims"
,
"get_attention_scale"
,
"get_attention_scale"
,
...
...
benchmarks/attention_benchmarks/common.py
View file @
05970c77
...
@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass
...
@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Any
from
typing
import
Any
import
numpy
as
np
import
torch
import
torch
from
batch_spec
import
get_batch_type
,
parse_batch_spec
from
batch_spec
import
get_batch_type
,
parse_batch_spec
from
rich.console
import
Console
from
rich.console
import
Console
...
@@ -62,10 +61,7 @@ class MockHfConfig:
...
@@ -62,10 +61,7 @@ class MockHfConfig:
# Import AttentionLayerBase at module level to avoid circular dependencies
# Import AttentionLayerBase at module level to avoid circular dependencies
try
:
try
:
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
from
vllm.model_executor.layers.attention_layer_base
import
AttentionLayerBase
_HAS_ATTENTION_LAYER_BASE
=
True
except
ImportError
:
except
ImportError
:
_HAS_ATTENTION_LAYER_BASE
=
False
AttentionLayerBase
=
object
# Fallback
AttentionLayerBase
=
object
# Fallback
...
@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase):
...
@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase):
return
self
.
_kv_cache_spec
return
self
.
_kv_cache_spec
class
MockModelConfig
:
"""Mock model configuration."""
def
__init__
(
self
,
num_q_heads
:
int
,
num_kv_heads
:
int
,
head_dim
:
int
,
dtype
:
torch
.
dtype
=
torch
.
float16
,
max_model_len
:
int
=
32768
,
):
self
.
_n_q
=
num_q_heads
self
.
_n_kv
=
num_kv_heads
self
.
_d
=
head_dim
self
.
dtype
=
dtype
self
.
max_model_len
=
max_model_len
def
get_num_attention_heads
(
self
,
_
=
None
)
->
int
:
return
self
.
_n_q
def
get_num_kv_heads
(
self
,
_
=
None
)
->
int
:
return
self
.
_n_kv
def
get_head_size
(
self
)
->
int
:
return
self
.
_d
def
get_num_layers
(
self
)
->
int
:
"""Mock method for layer count queries."""
return
1
def
get_sliding_window_for_layer
(
self
,
_layer_idx
:
int
):
"""Mock method for sliding window queries."""
return
None
def
get_logits_soft_cap_for_layer
(
self
,
_layer_idx
:
int
):
"""Mock method for logits soft cap queries."""
return
None
def
get_sm_scale_for_layer
(
self
,
_layer_idx
:
int
)
->
float
:
"""Mock method for SM scale queries."""
return
1.0
/
(
self
.
get_head_size
()
**
0.5
)
class
MockParallelConfig
:
"""Mock parallel configuration."""
pass
class
MockCompilationConfig
:
"""Mock compilation configuration."""
def
__init__
(
self
):
self
.
full_cuda_graph
=
False
self
.
static_forward_context
=
{}
class
MockVLLMConfig
:
"""Mock VLLM configuration."""
def
__init__
(
self
):
self
.
compilation_config
=
MockCompilationConfig
()
class
MockRunner
:
"""Mock GPU runner for metadata builders."""
def
__init__
(
self
,
seq_lens
:
np
.
ndarray
,
query_start_locs
:
np
.
ndarray
,
device
:
torch
.
device
,
num_q_heads
:
int
,
num_kv_heads
:
int
,
head_dim
:
int
,
dtype
:
torch
.
dtype
,
):
self
.
model_config
=
MockModelConfig
(
num_q_heads
,
num_kv_heads
,
head_dim
,
dtype
)
self
.
parallel_config
=
MockParallelConfig
()
self
.
vllm_config
=
MockVLLMConfig
()
self
.
seq_lens_np
=
seq_lens
self
.
query_start_loc_np
=
query_start_locs
self
.
device
=
device
self
.
attention_chunk_size
=
None
self
.
num_query_heads
=
num_q_heads
self
.
num_kv_heads
=
num_kv_heads
self
.
dtype
=
dtype
@
dataclass
@
dataclass
class
ParameterSweep
:
class
ParameterSweep
:
"""Configuration for sweeping a backend parameter."""
"""Configuration for sweeping a backend parameter."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment