Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
641fc5b7
Commit
641fc5b7
authored
Apr 14, 2025
by
zhuwenwen
Browse files
remove unused code
parent
b01efa0b
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
6 additions
and
152 deletions
+6
-152
vllm/attention/backends/blocksparse_attn.py
vllm/attention/backends/blocksparse_attn.py
+0
-1
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/rocm_flash_attn.py
+4
-6
vllm/config.py
vllm/config.py
+0
-1
vllm/distributed/device_communicators/custom_all_reduce.py
vllm/distributed/device_communicators/custom_all_reduce.py
+0
-2
vllm/lora/models.py
vllm/lora/models.py
+1
-0
vllm/multimodal/utils.py
vllm/multimodal/utils.py
+0
-2
vllm/platforms/cpu.py
vllm/platforms/cpu.py
+0
-4
vllm/platforms/interface.py
vllm/platforms/interface.py
+0
-5
vllm/transformers_utils/configs/qwen2vl.py
vllm/transformers_utils/configs/qwen2vl.py
+0
-131
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+1
-0
No files found.
vllm/attention/backends/blocksparse_attn.py
View file @
641fc5b7
...
@@ -13,7 +13,6 @@ from vllm.attention.backends.utils import (CommonAttentionState,
...
@@ -13,7 +13,6 @@ from vllm.attention.backends.utils import (CommonAttentionState,
from
vllm.attention.ops.blocksparse_attention.interface
import
(
from
vllm.attention.ops.blocksparse_attention.interface
import
(
LocalStridedBlockSparseAttn
,
get_head_sliding_step
)
LocalStridedBlockSparseAttn
,
get_head_sliding_step
)
from
vllm.attention.ops.paged_attn
import
PagedAttention
from
vllm.attention.ops.paged_attn
import
PagedAttention
from
vllm
import
_custom_ops
as
ops
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
get_tensor_model_parallel_world_size
)
...
...
vllm/attention/backends/rocm_flash_attn.py
View file @
641fc5b7
...
@@ -148,7 +148,6 @@ class ROCmFlashAttentionMetadata(AttentionMetadata, PagedAttentionMetadata):
...
@@ -148,7 +148,6 @@ class ROCmFlashAttentionMetadata(AttentionMetadata, PagedAttentionMetadata):
cross_slot_mapping
:
Optional
[
torch
.
Tensor
]
=
None
cross_slot_mapping
:
Optional
[
torch
.
Tensor
]
=
None
cross_block_tables
:
Optional
[
torch
.
Tensor
]
=
None
cross_block_tables
:
Optional
[
torch
.
Tensor
]
=
None
@
property
@
property
def
prefill_metadata
(
self
)
->
Optional
[
"ROCmFlashAttentionMetadata"
]:
def
prefill_metadata
(
self
)
->
Optional
[
"ROCmFlashAttentionMetadata"
]:
if
self
.
num_prefills
==
0
:
if
self
.
num_prefills
==
0
:
...
@@ -723,7 +722,6 @@ class ROCmFlashAttentionImpl(AttentionImpl):
...
@@ -723,7 +722,6 @@ class ROCmFlashAttentionImpl(AttentionImpl):
attn_masks
[
0
][
None
]
attn_masks
[
0
][
None
]
if
attn_masks
is
not
None
else
None
,
if
attn_masks
is
not
None
else
None
,
)
)
elif
self
.
use_naive_attn
:
elif
self
.
use_naive_attn
:
if
self
.
num_kv_heads
!=
self
.
num_heads
:
if
self
.
num_kv_heads
!=
self
.
num_heads
:
# Interleave for MQA workaround.
# Interleave for MQA workaround.
...
...
vllm/config.py
View file @
641fc5b7
...
@@ -2305,7 +2305,6 @@ class SpeculativeConfig:
...
@@ -2305,7 +2305,6 @@ class SpeculativeConfig:
f
"other value than 1 or target model tensor_parallel_size"
)
f
"other value than 1 or target model tensor_parallel_size"
)
return
speculative_draft_tensor_parallel_size
return
speculative_draft_tensor_parallel_size
@
staticmethod
@
staticmethod
def
create_draft_parallel_config
(
def
create_draft_parallel_config
(
target_parallel_config
:
ParallelConfig
,
target_parallel_config
:
ParallelConfig
,
...
...
vllm/distributed/device_communicators/custom_all_reduce.py
View file @
641fc5b7
...
@@ -19,7 +19,6 @@ from vllm.utils import cuda_device_count_stateless
...
@@ -19,7 +19,6 @@ from vllm.utils import cuda_device_count_stateless
try
:
try
:
ops
.
meta_size
()
ops
.
meta_size
()
custom_ar
=
True
custom_ar
=
True
except
Exception
:
except
Exception
:
# For CPUs
# For CPUs
custom_ar
=
False
custom_ar
=
False
...
@@ -130,7 +129,6 @@ class CustomAllreduce:
...
@@ -130,7 +129,6 @@ class CustomAllreduce:
# test nvlink first, this will filter out most of the cases
# test nvlink first, this will filter out most of the cases
# where custom allreduce is not supported
# where custom allreduce is not supported
# this checks hardware and driver support for NVLink
# this checks hardware and driver support for NVLink
assert
current_platform
.
is_cuda_alike
()
assert
current_platform
.
is_cuda_alike
()
fully_connected
=
current_platform
.
is_fully_connected
(
fully_connected
=
current_platform
.
is_fully_connected
(
physical_device_ids
)
physical_device_ids
)
...
...
vllm/lora/models.py
View file @
641fc5b7
...
@@ -341,6 +341,7 @@ class LoRAModelManager(AdapterModelManager):
...
@@ -341,6 +341,7 @@ class LoRAModelManager(AdapterModelManager):
# Used for long context lora.
# Used for long context lora.
self
.
scaling_factor_to_offset
:
Dict
[
float
,
int
]
=
{}
self
.
scaling_factor_to_offset
:
Dict
[
float
,
int
]
=
{}
super
().
__init__
(
model
)
super
().
__init__
(
model
)
self
.
supported_lora_modules
=
get_supported_lora_modules
(
self
.
model
)
self
.
supported_lora_modules
=
get_supported_lora_modules
(
self
.
model
)
assert
self
.
supported_lora_modules
,
"No supported LoRA modules found in"
assert
self
.
supported_lora_modules
,
"No supported LoRA modules found in"
f
"
{
self
.
model
.
__class__
.
__name__
}
."
f
"
{
self
.
model
.
__class__
.
__name__
}
."
...
...
vllm/multimodal/utils.py
View file @
641fc5b7
...
@@ -9,7 +9,6 @@ import numpy as np
...
@@ -9,7 +9,6 @@ import numpy as np
import
numpy.typing
as
npt
import
numpy.typing
as
npt
import
torch
import
torch
from
PIL
import
Image
from
PIL
import
Image
import
os
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.connections
import
HTTPConnection
,
global_http_connection
from
vllm.connections
import
HTTPConnection
,
global_http_connection
...
@@ -87,7 +86,6 @@ class MediaConnector:
...
@@ -87,7 +86,6 @@ class MediaConnector:
return
media_io
.
load_file
(
filepath
)
return
media_io
.
load_file
(
filepath
)
def
load_from_url
(
def
load_from_url
(
self
,
self
,
url
:
str
,
url
:
str
,
...
...
vllm/platforms/cpu.py
View file @
641fc5b7
...
@@ -32,10 +32,6 @@ class CpuPlatform(Platform):
...
@@ -32,10 +32,6 @@ class CpuPlatform(Platform):
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
return
"cpu"
return
"cpu"
@
classmethod
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
return
psutil
.
virtual_memory
().
total
@
classmethod
@
classmethod
def
get_attn_backend_cls
(
cls
,
selected_backend
:
_Backend
,
head_size
:
int
,
def
get_attn_backend_cls
(
cls
,
selected_backend
:
_Backend
,
head_size
:
int
,
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
Optional
[
str
],
dtype
:
torch
.
dtype
,
kv_cache_dtype
:
Optional
[
str
],
...
...
vllm/platforms/interface.py
View file @
641fc5b7
...
@@ -203,11 +203,6 @@ class Platform:
...
@@ -203,11 +203,6 @@ class Platform:
"""
"""
raise
NotImplementedError
raise
NotImplementedError
@
classmethod
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
"""Get the total memory of a device in bytes."""
raise
NotImplementedError
@
classmethod
@
classmethod
def
inference_mode
(
cls
):
def
inference_mode
(
cls
):
"""A device-specific wrapper of `torch.inference_mode`.
"""A device-specific wrapper of `torch.inference_mode`.
...
...
vllm/transformers_utils/configs/qwen2vl.py
deleted
100644 → 0
View file @
b01efa0b
# coding=utf-8
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Qwen2VL model configuration"""
import
os
from
typing
import
Union
from
transformers
import
PretrainedConfig
class
Qwen2VLVisionConfig
(
PretrainedConfig
):
model_type
=
"qwen2_vl"
def
__init__
(
self
,
depth
=
32
,
embed_dim
=
1280
,
hidden_size
=
3584
,
hidden_act
=
"quick_gelu"
,
mlp_ratio
=
4
,
num_heads
=
16
,
in_channels
=
3
,
patch_size
=
14
,
spatial_merge_size
=
2
,
temporal_patch_size
=
2
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
depth
=
depth
self
.
embed_dim
=
embed_dim
self
.
hidden_size
=
hidden_size
self
.
hidden_act
=
hidden_act
self
.
mlp_ratio
=
mlp_ratio
self
.
num_heads
=
num_heads
self
.
in_channels
=
in_channels
self
.
patch_size
=
patch_size
self
.
spatial_merge_size
=
spatial_merge_size
self
.
temporal_patch_size
=
temporal_patch_size
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
:
Union
[
str
,
os
.
PathLike
],
**
kwargs
)
->
"PretrainedConfig"
:
cls
.
_set_token_in_kwargs
(
kwargs
)
config_dict
,
kwargs
=
cls
.
get_config_dict
(
pretrained_model_name_or_path
,
**
kwargs
)
if
config_dict
.
get
(
"model_type"
)
==
"qwen2_vl"
:
config_dict
=
config_dict
[
"vision_config"
]
return
cls
.
from_dict
(
config_dict
,
**
kwargs
)
class
Qwen2VLConfig
(
PretrainedConfig
):
def
__init__
(
self
,
vocab_size
=
152064
,
hidden_size
=
8192
,
intermediate_size
=
29568
,
num_hidden_layers
=
80
,
num_attention_heads
=
64
,
num_key_value_heads
=
8
,
hidden_act
=
"silu"
,
max_position_embeddings
=
32768
,
initializer_range
=
0.02
,
rms_norm_eps
=
1e-05
,
use_cache
=
True
,
tie_word_embeddings
=
False
,
rope_theta
=
1000000.0
,
use_sliding_window
=
False
,
sliding_window
=
4096
,
max_window_layers
=
80
,
attention_dropout
=
0.0
,
vision_config
=
None
,
rope_scaling
=
None
,
**
kwargs
,
):
if
isinstance
(
vision_config
,
dict
):
self
.
vision_config
=
Qwen2VLVisionConfig
(
**
vision_config
)
elif
vision_config
is
None
:
self
.
vision_config
=
Qwen2VLVisionConfig
()
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
hidden_size
=
hidden_size
self
.
intermediate_size
=
intermediate_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
use_sliding_window
=
use_sliding_window
self
.
sliding_window
=
sliding_window
self
.
max_window_layers
=
max_window_layers
# for backward compatibility
if
num_key_value_heads
is
None
:
num_key_value_heads
=
num_attention_heads
self
.
num_key_value_heads
=
num_key_value_heads
self
.
hidden_act
=
hidden_act
self
.
initializer_range
=
initializer_range
self
.
rms_norm_eps
=
rms_norm_eps
self
.
use_cache
=
use_cache
self
.
rope_theta
=
rope_theta
self
.
attention_dropout
=
attention_dropout
self
.
rope_scaling
=
rope_scaling
# NOTE: the following section from original transformers config
# for Qwen2-VL is commented out to address rope config loading issue
#
# if self.rope_scaling is not None and "type" in self.rope_scaling:
# if self.rope_scaling["type"] == "mrope":
# self.rope_scaling["type"] = "default"
# self.rope_scaling["rope_type"] = self.rope_scaling["type"]
# rope_config_validation(self)
super
().
__init__
(
tie_word_embeddings
=
tie_word_embeddings
,
**
kwargs
)
\ No newline at end of file
vllm/worker/model_runner.py
View file @
641fc5b7
...
@@ -352,6 +352,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
...
@@ -352,6 +352,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
prompt_adapter_prompt_mapping
prompt_adapter_prompt_mapping
else
:
else
:
self
.
prompt_adapter_prompt_mapping
.
clear
()
self
.
prompt_adapter_prompt_mapping
.
clear
()
else
:
else
:
self
.
input_tokens
=
input_tokens
or
[]
self
.
input_tokens
=
input_tokens
or
[]
self
.
input_positions
=
input_positions
or
[]
self
.
input_positions
=
input_positions
or
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment