[Refactor] Remove dead code for attention benchmark script (#35418)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Refactor] Remove dead code for attention benchmark script (#35418)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
05970c77 · Wentao Ye · GitHub · d9406076 · 05970c77 · 05970c77
Unverified Commit 05970c77 authored Feb 26, 2026 by Wentao Ye Committed by GitHub Feb 26, 2026
Showing with 0 additions and 95 deletions

benchmarks/attention_benchmarks/__init__.py benchmarks/attention_benchmarks/__init__.py +0 -2

benchmarks/attention_benchmarks/common.py benchmarks/attention_benchmarks/common.py +0 -93

No files found.
--- a/benchmarks/attention_benchmarks/__init__.py
+++ b/benchmarks/attention_benchmarks/__init__.py
@@ -15,7 +15,6 @@ from .common import (
    BenchmarkConfig,
    BenchmarkResult,
    MockLayer,
-    MockModelConfig,
    ResultsFormatter,
    get_attention_scale,
    is_mla_backend,
@@ -36,7 +35,6 @@ __all__ = [
    "ResultsFormatter",
    # Mock objects
    "MockLayer",
-    "MockModelConfig",
    # Utilities
    "setup_mla_dims",
    "get_attention_scale",

--- a/benchmarks/attention_benchmarks/common.py
+++ b/benchmarks/attention_benchmarks/common.py
@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Any

-import numpy as np
 import torch
 from batch_spec import get_batch_type, parse_batch_spec
 from rich.console import Console
@@ -62,10 +61,7 @@ class MockHfConfig:
 # Import AttentionLayerBase at module level to avoid circular dependencies
 try:
    from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
-
-    _HAS_ATTENTION_LAYER_BASE = True
 except ImportError:
-    _HAS_ATTENTION_LAYER_BASE = False
    AttentionLayerBase = object  # Fallback


@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase):
        return self._kv_cache_spec


-class MockModelConfig:
-    """Mock model configuration."""
-
-    def __init__(
-        self,
-        num_q_heads: int,
-        num_kv_heads: int,
-        head_dim: int,
-        dtype: torch.dtype = torch.float16,
-        max_model_len: int = 32768,
-    ):
-        self._n_q = num_q_heads
-        self._n_kv = num_kv_heads
-        self._d = head_dim
-        self.dtype = dtype
-        self.max_model_len = max_model_len
-
-    def get_num_attention_heads(self, _=None) -> int:
-        return self._n_q
-
-    def get_num_kv_heads(self, _=None) -> int:
-        return self._n_kv
-
-    def get_head_size(self) -> int:
-        return self._d
-
-    def get_num_layers(self) -> int:
-        """Mock method for layer count queries."""
-        return 1
-
-    def get_sliding_window_for_layer(self, _layer_idx: int):
-        """Mock method for sliding window queries."""
-        return None
-
-    def get_logits_soft_cap_for_layer(self, _layer_idx: int):
-        """Mock method for logits soft cap queries."""
-        return None
-
-    def get_sm_scale_for_layer(self, _layer_idx: int) -> float:
-        """Mock method for SM scale queries."""
-        return 1.0 / (self.get_head_size() ** 0.5)
-
-
-class MockParallelConfig:
-    """Mock parallel configuration."""
-
-    pass
-
-
-class MockCompilationConfig:
-    """Mock compilation configuration."""
-
-    def __init__(self):
-        self.full_cuda_graph = False
-        self.static_forward_context = {}
-
-
-class MockVLLMConfig:
-    """Mock VLLM configuration."""
-
-    def __init__(self):
-        self.compilation_config = MockCompilationConfig()
-
-
-class MockRunner:
-    """Mock GPU runner for metadata builders."""
-
-    def __init__(
-        self,
-        seq_lens: np.ndarray,
-        query_start_locs: np.ndarray,
-        device: torch.device,
-        num_q_heads: int,
-        num_kv_heads: int,
-        head_dim: int,
-        dtype: torch.dtype,
-    ):
-        self.model_config = MockModelConfig(num_q_heads, num_kv_heads, head_dim, dtype)
-        self.parallel_config = MockParallelConfig()
-        self.vllm_config = MockVLLMConfig()
-        self.seq_lens_np = seq_lens
-        self.query_start_loc_np = query_start_locs
-        self.device = device
-        self.attention_chunk_size = None
-        self.num_query_heads = num_q_heads
-        self.num_kv_heads = num_kv_heads
-        self.dtype = dtype
-
-
 @dataclass
 class ParameterSweep:
    """Configuration for sweeping a backend parameter."""