Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
539aa992
Commit
539aa992
authored
Sep 27, 2024
by
zhuwenwen
Browse files
Merge tag 'v0.6.2' into v0.6.2-dev
parents
93872128
7193774b
Changes
383
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
15 deletions
+13
-15
vllm/worker/utils.py
vllm/worker/utils.py
+0
-8
vllm/worker/worker.py
vllm/worker/worker.py
+11
-5
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+2
-2
No files found.
vllm/worker/utils.py
View file @
539aa992
...
...
@@ -39,18 +39,10 @@ def assert_enc_dec_mr_supported_scenario(
raise
NotImplementedError
(
STR_NOT_IMPL_ENC_DEC_ERR_STRS
[
'STR_NOT_IMPL_ENC_DEC_PP'
])
if
enc_dec_mr
.
model_config
.
is_multimodal_model
:
raise
NotImplementedError
(
STR_NOT_IMPL_ENC_DEC_ERR_STRS
[
'STR_NOT_IMPL_ENC_DEC_MM'
])
if
enc_dec_mr
.
scheduler_config
.
num_lookahead_slots
>
0
:
raise
NotImplementedError
(
STR_NOT_IMPL_ENC_DEC_ERR_STRS
[
'STR_NOT_IMPL_ENC_DEC_SPEC_DEC'
])
if
not
enc_dec_mr
.
model_config
.
enforce_eager
:
raise
NotImplementedError
(
STR_NOT_IMPL_ENC_DEC_ERR_STRS
[
'STR_NOT_IMPL_ENC_DEC_CUDA_GRAPH'
])
if
enc_dec_mr
.
prompt_adapter_config
is
not
None
:
raise
NotImplementedError
(
STR_NOT_IMPL_ENC_DEC_ERR_STRS
[
'STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER'
])
vllm/worker/worker.py
View file @
539aa992
...
...
@@ -454,14 +454,20 @@ def init_worker_distributed_environment(
def
_check_if_gpu_supports_dtype
(
torch_dtype
:
torch
.
dtype
):
# Check if the GPU supports the dtype.
if
torch_dtype
==
torch
.
bfloat16
:
compute_capability
=
current_platform
.
get
_device_capability
(
)
if
comput
e_capability
[
0
]
<
8
:
if
torch_dtype
==
torch
.
bfloat16
:
# noqa: SIM102
if
not
current_platform
.
has
_device_capability
(
80
):
capability
=
current_platform
.
get_devic
e_capability
()
gpu_name
=
current_platform
.
get_device_name
()
if
capability
is
None
:
compute_str
=
"does not have a compute capability"
else
:
version_str
=
capability
.
as_version_str
()
compute_str
=
f
"has compute capability
{
version_str
}
"
raise
ValueError
(
"Bfloat16 is only supported on GPUs with compute capability "
f
"of at least 8.0. Your
{
gpu_name
}
GPU has compute capability "
f
"
{
compute_capability
[
0
]
}
.
{
compute_capability
[
1
]
}
. "
f
"of at least 8.0. Your
{
gpu_name
}
GPU
{
compute_str
}
. "
"You can use float16 instead by explicitly setting the"
"`dtype` flag in CLI, for example: --dtype=half."
)
...
...
vllm/worker/xpu_model_runner.py
View file @
539aa992
...
...
@@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
MultiModalInputs
,
MultiModalRegistry
)
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.utils
import
Cuda
MemoryProfiler
,
make_tensor_with_pad
from
vllm.utils
import
Device
MemoryProfiler
,
make_tensor_with_pad
from
vllm.worker.model_runner
import
AttentionMetadata
,
SamplingMetadata
from
vllm.worker.model_runner_base
import
(
ModelRunnerBase
,
ModelRunnerInputBase
,
ModelRunnerInputBuilderBase
,
...
...
@@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
self
.
model
:
nn
.
Module
# Set after init_Model
def
load_model
(
self
)
->
None
:
with
Cuda
MemoryProfiler
()
as
m
:
with
Device
MemoryProfiler
()
as
m
:
self
.
model
=
get_model
(
model_config
=
self
.
model_config
,
device_config
=
self
.
device_config
,
...
...
Prev
1
…
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment