Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6d172ab4
Commit
6d172ab4
authored
Jun 27, 2025
by
王敏
Browse files
1.临时解决gemm调用到blaslt问题
2.支持设置v1的chunked_prefill开关
parent
fdda4d82
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
25 additions
and
8 deletions
+25
-8
vllm/config.py
vllm/config.py
+5
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+7
-2
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+12
-6
vllm/worker/worker_base.py
vllm/worker/worker_base.py
+1
-0
No files found.
vllm/config.py
View file @
6d172ab4
...
@@ -423,6 +423,10 @@ class ModelConfig:
...
@@ -423,6 +423,10 @@ class ModelConfig:
- "vllm" will use the vLLM model implementation.
\n
- "vllm" will use the vLLM model implementation.
\n
- "transformers" will use the Transformers model implementation."""
- "transformers" will use the Transformers model implementation."""
enable_chunked_prefill
:
Optional
[
bool
]
=
None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
def
compute_hash
(
self
)
->
str
:
def
compute_hash
(
self
)
->
str
:
"""
"""
WARNING: Whenever a new field is added to this config,
WARNING: Whenever a new field is added to this config,
...
@@ -452,6 +456,7 @@ class ModelConfig:
...
@@ -452,6 +456,7 @@ class ModelConfig:
factors
.
append
(
self
.
rope_theta
)
factors
.
append
(
self
.
rope_theta
)
# hf_config can control how the model looks!
# hf_config can control how the model looks!
factors
.
append
(
self
.
hf_config
.
to_json_string
())
factors
.
append
(
self
.
hf_config
.
to_json_string
())
factors
.
append
(
self
.
enable_chunked_prefill
)
str_factors
=
str
(
factors
)
str_factors
=
str
(
factors
)
assert_hashable
(
str_factors
)
assert_hashable
(
str_factors
)
return
hashlib
.
sha256
(
str
(
factors
).
encode
()).
hexdigest
()
return
hashlib
.
sha256
(
str
(
factors
).
encode
()).
hexdigest
()
...
...
vllm/engine/arg_utils.py
View file @
6d172ab4
...
@@ -956,6 +956,7 @@ class EngineArgs:
...
@@ -956,6 +956,7 @@ class EngineArgs:
override_generation_config
=
self
.
override_generation_config
,
override_generation_config
=
self
.
override_generation_config
,
enable_sleep_mode
=
self
.
enable_sleep_mode
,
enable_sleep_mode
=
self
.
enable_sleep_mode
,
model_impl
=
self
.
model_impl
,
model_impl
=
self
.
model_impl
,
enable_chunked_prefill
=
self
.
enable_chunked_prefill
)
)
def
create_load_config
(
self
)
->
LoadConfig
:
def
create_load_config
(
self
)
->
LoadConfig
:
...
@@ -1046,7 +1047,7 @@ class EngineArgs:
...
@@ -1046,7 +1047,7 @@ class EngineArgs:
# Set default arguments for V0 or V1 Engine.
# Set default arguments for V0 or V1 Engine.
if
use_v1
:
if
use_v1
:
self
.
_set_default_args_v1
(
usage_context
)
self
.
_set_default_args_v1
(
usage_context
,
model_config
)
else
:
else
:
self
.
_set_default_args_v0
(
model_config
)
self
.
_set_default_args_v0
(
model_config
)
...
@@ -1532,12 +1533,16 @@ class EngineArgs:
...
@@ -1532,12 +1533,16 @@ class EngineArgs:
if
self
.
max_num_seqs
is
None
:
if
self
.
max_num_seqs
is
None
:
self
.
max_num_seqs
=
256
self
.
max_num_seqs
=
256
def
_set_default_args_v1
(
self
,
usage_context
:
UsageContext
)
->
None
:
def
_set_default_args_v1
(
self
,
usage_context
:
UsageContext
,
model_config
:
ModelConfig
)
->
None
:
"""Set Default Arguments for V1 Engine."""
"""Set Default Arguments for V1 Engine."""
# V1 always uses chunked prefills.
# V1 always uses chunked prefills.
self
.
enable_chunked_prefill
=
True
self
.
enable_chunked_prefill
=
True
if
model_config
.
enable_chunked_prefill
is
not
None
and
\
model_config
.
enable_chunked_prefill
is
False
:
self
.
enable_chunked_prefill
=
False
# V1 enables prefix caching by default.
# V1 enables prefix caching by default.
if
self
.
enable_prefix_caching
is
None
:
if
self
.
enable_prefix_caching
is
None
:
self
.
enable_prefix_caching
=
True
self
.
enable_prefix_caching
=
True
...
...
vllm/platforms/rocm.py
View file @
6d172ab4
...
@@ -100,6 +100,15 @@ def with_amdsmi_context(fn):
...
@@ -100,6 +100,15 @@ def with_amdsmi_context(fn):
return
wrapper
return
wrapper
def
device_id_to_physical_device_id
(
device_id
:
int
)
->
int
:
if
"CUDA_VISIBLE_DEVICES"
in
os
.
environ
:
device_ids
=
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
].
split
(
","
)
physical_device_id
=
device_ids
[
device_id
]
return
int
(
physical_device_id
)
else
:
return
device_id
@
cache
@
cache
def
on_gfx1x
()
->
bool
:
def
on_gfx1x
()
->
bool
:
GPU_ARCH
=
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
GPU_ARCH
=
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
...
@@ -396,13 +405,10 @@ class RocmPlatform(Platform):
...
@@ -396,13 +405,10 @@ class RocmPlatform(Platform):
@
with_amdsmi_context
@
with_amdsmi_context
@
lru_cache
(
maxsize
=
8
)
@
lru_cache
(
maxsize
=
8
)
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
def
get_device_name
(
cls
,
device_id
:
int
=
0
)
->
str
:
physical_device_id
=
cls
.
device_id_to_physical_device_id
(
device_id
)
physical_device_id
=
device_id_to_physical_device_id
(
device_id
)
handle
=
amdsmi_get_processor_handles
()[
physical_device_id
]
handle
=
amdsmi_get_processor_handles
()[
physical_device_id
]
asic_info
=
amdsmi_get_gpu_asic_info
(
handle
)
# return amdsmi_get_gpu_asic_info(handle)["market_name"]
device_name
:
str
=
asic_info
[
"device_id"
]
return
torch
.
cuda
.
get_device_name
(
device_id
)
if
device_name
in
_ROCM_DEVICE_ID_NAME_MAP
:
return
_ROCM_DEVICE_ID_NAME_MAP
[
device_name
]
return
asic_info
[
"market_name"
]
@
classmethod
@
classmethod
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
def
get_device_total_memory
(
cls
,
device_id
:
int
=
0
)
->
int
:
...
...
vllm/worker/worker_base.py
View file @
6d172ab4
...
@@ -28,6 +28,7 @@ from vllm.worker.model_runner_base import (BroadcastableModelInput,
...
@@ -28,6 +28,7 @@ from vllm.worker.model_runner_base import (BroadcastableModelInput,
ModelRunnerBase
,
ModelRunnerBase
,
ModelRunnerInputBase
)
ModelRunnerInputBase
)
torch
.
_C
.
_set_blas_preferred_backend
(
torch
.
_C
.
_BlasBackend
.
Cublas
)
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment