Unverified Commit 05970c77 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Remove dead code for attention benchmark script (#35418)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent d9406076
...@@ -15,7 +15,6 @@ from .common import ( ...@@ -15,7 +15,6 @@ from .common import (
BenchmarkConfig, BenchmarkConfig,
BenchmarkResult, BenchmarkResult,
MockLayer, MockLayer,
MockModelConfig,
ResultsFormatter, ResultsFormatter,
get_attention_scale, get_attention_scale,
is_mla_backend, is_mla_backend,
...@@ -36,7 +35,6 @@ __all__ = [ ...@@ -36,7 +35,6 @@ __all__ = [
"ResultsFormatter", "ResultsFormatter",
# Mock objects # Mock objects
"MockLayer", "MockLayer",
"MockModelConfig",
# Utilities # Utilities
"setup_mla_dims", "setup_mla_dims",
"get_attention_scale", "get_attention_scale",
......
...@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass ...@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
import numpy as np
import torch import torch
from batch_spec import get_batch_type, parse_batch_spec from batch_spec import get_batch_type, parse_batch_spec
from rich.console import Console from rich.console import Console
...@@ -62,10 +61,7 @@ class MockHfConfig: ...@@ -62,10 +61,7 @@ class MockHfConfig:
# Import AttentionLayerBase at module level to avoid circular dependencies # Import AttentionLayerBase at module level to avoid circular dependencies
try: try:
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
_HAS_ATTENTION_LAYER_BASE = True
except ImportError: except ImportError:
_HAS_ATTENTION_LAYER_BASE = False
AttentionLayerBase = object # Fallback AttentionLayerBase = object # Fallback
...@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase): ...@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase):
return self._kv_cache_spec return self._kv_cache_spec
class MockModelConfig:
"""Mock model configuration."""
def __init__(
self,
num_q_heads: int,
num_kv_heads: int,
head_dim: int,
dtype: torch.dtype = torch.float16,
max_model_len: int = 32768,
):
self._n_q = num_q_heads
self._n_kv = num_kv_heads
self._d = head_dim
self.dtype = dtype
self.max_model_len = max_model_len
def get_num_attention_heads(self, _=None) -> int:
return self._n_q
def get_num_kv_heads(self, _=None) -> int:
return self._n_kv
def get_head_size(self) -> int:
return self._d
def get_num_layers(self) -> int:
"""Mock method for layer count queries."""
return 1
def get_sliding_window_for_layer(self, _layer_idx: int):
"""Mock method for sliding window queries."""
return None
def get_logits_soft_cap_for_layer(self, _layer_idx: int):
"""Mock method for logits soft cap queries."""
return None
def get_sm_scale_for_layer(self, _layer_idx: int) -> float:
"""Mock method for SM scale queries."""
return 1.0 / (self.get_head_size() ** 0.5)
class MockParallelConfig:
"""Mock parallel configuration."""
pass
class MockCompilationConfig:
"""Mock compilation configuration."""
def __init__(self):
self.full_cuda_graph = False
self.static_forward_context = {}
class MockVLLMConfig:
"""Mock VLLM configuration."""
def __init__(self):
self.compilation_config = MockCompilationConfig()
class MockRunner:
"""Mock GPU runner for metadata builders."""
def __init__(
self,
seq_lens: np.ndarray,
query_start_locs: np.ndarray,
device: torch.device,
num_q_heads: int,
num_kv_heads: int,
head_dim: int,
dtype: torch.dtype,
):
self.model_config = MockModelConfig(num_q_heads, num_kv_heads, head_dim, dtype)
self.parallel_config = MockParallelConfig()
self.vllm_config = MockVLLMConfig()
self.seq_lens_np = seq_lens
self.query_start_loc_np = query_start_locs
self.device = device
self.attention_chunk_size = None
self.num_query_heads = num_q_heads
self.num_kv_heads = num_kv_heads
self.dtype = dtype
@dataclass @dataclass
class ParameterSweep: class ParameterSweep:
"""Configuration for sweeping a backend parameter.""" """Configuration for sweeping a backend parameter."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment