Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
61 additions
and
3 deletions
+61
-3
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+5
-0
vllm/platforms/tpu.py
vllm/platforms/tpu.py
+2
-0
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+22
-3
vllm/plugins/__init__.py
vllm/plugins/__init__.py
+2
-0
vllm/pooling_params.py
vllm/pooling_params.py
+2
-0
vllm/profiler/__init__.py
vllm/profiler/__init__.py
+2
-0
vllm/profiler/layerwise_profile.py
vllm/profiler/layerwise_profile.py
+2
-0
vllm/profiler/utils.py
vllm/profiler/utils.py
+2
-0
vllm/prompt_adapter/layers.py
vllm/prompt_adapter/layers.py
+2
-0
vllm/prompt_adapter/models.py
vllm/prompt_adapter/models.py
+2
-0
vllm/prompt_adapter/request.py
vllm/prompt_adapter/request.py
+2
-0
vllm/prompt_adapter/utils.py
vllm/prompt_adapter/utils.py
+2
-0
vllm/prompt_adapter/worker_manager.py
vllm/prompt_adapter/worker_manager.py
+2
-0
vllm/sampling_params.py
vllm/sampling_params.py
+1
-0
vllm/scalar_type.py
vllm/scalar_type.py
+2
-0
vllm/scripts.py
vllm/scripts.py
+2
-0
vllm/sequence.py
vllm/sequence.py
+1
-0
vllm/spec_decode/batch_expansion.py
vllm/spec_decode/batch_expansion.py
+2
-0
vllm/spec_decode/draft_model_runner.py
vllm/spec_decode/draft_model_runner.py
+2
-0
vllm/spec_decode/interfaces.py
vllm/spec_decode/interfaces.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
vllm/platforms/rocm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
from
functools
import
lru_cache
from
typing
import
TYPE_CHECKING
,
Dict
,
List
,
Optional
...
...
@@ -77,6 +79,9 @@ class RocmPlatform(Platform):
def
get_attn_backend_cls
(
cls
,
selected_backend
,
head_size
,
dtype
,
kv_cache_dtype
,
block_size
,
use_v1
,
use_mla
)
->
str
:
if
use_mla
:
logger
.
info
(
"Using Triton MLA backend."
)
return
"vllm.attention.backends.triton_mla.TritonMLABackend"
selected_backend
=
(
_Backend
.
ROCM_FLASH
if
selected_backend
==
_Backend
.
FLASH_ATTN
else
selected_backend
)
if
selected_backend
==
_Backend
.
ROCM_FLASH
:
...
...
vllm/platforms/tpu.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
TYPE_CHECKING
,
Optional
import
torch
...
...
vllm/platforms/xpu.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
TYPE_CHECKING
,
Optional
import
torch
...
...
@@ -66,9 +68,14 @@ class XPUPlatform(Platform):
# check and update model config
model_config
=
vllm_config
.
model_config
if
model_config
.
dtype
==
torch
.
bfloat16
:
logger
.
warning
(
"bfloat16 is not fully supported on XPU, casting to float16."
)
model_config
.
dtype
=
torch
.
float16
bf16_supported
=
cls
.
device_support_bf16
()
if
not
bf16_supported
:
logger
.
warning
(
"bfloat16 is only supported on Intel Data Center GPU, "
"Intel Arc GPU is not supported yet. Your device is %s,"
"which is not supported. will fallback to float16"
,
cls
.
get_device_name
())
model_config
.
dtype
=
torch
.
float16
if
not
model_config
.
enforce_eager
:
logger
.
warning
(
"CUDA graph is not supported on XPU, fallback to the eager "
...
...
@@ -116,3 +123,15 @@ class XPUPlatform(Platform):
)
->
float
:
torch
.
xpu
.
reset_peak_memory_stats
(
device
)
return
torch
.
xpu
.
max_memory_allocated
(
device
)
@
classmethod
def
device_support_bf16
(
cls
)
->
bool
:
device_name
=
cls
.
get_device_name
().
lower
()
if
device_name
.
count
(
"arc"
)
>
0
:
return
False
elif
device_name
.
count
(
"data center gpu"
)
>
0
:
return
True
else
:
logger
.
warning
(
"Unknown device name %s, always use float16"
,
device_name
)
return
False
vllm/plugins/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
logging
import
os
from
typing
import
Callable
,
Dict
...
...
vllm/pooling_params.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
Optional
import
msgspec
...
...
vllm/profiler/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
.layerwise_profile
import
layerwise_profile
__all__
=
[
...
...
vllm/profiler/layerwise_profile.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
copy
from
collections
import
defaultdict
from
dataclasses
import
asdict
,
dataclass
,
field
...
...
vllm/profiler/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
dataclasses
from
typing
import
Callable
,
Dict
,
List
,
Type
,
Union
...
...
vllm/prompt_adapter/layers.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
from
typing
import
Optional
...
...
vllm/prompt_adapter/models.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
logging
import
math
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Type
...
...
vllm/prompt_adapter/request.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
msgspec
from
vllm.adapter_commons.request
import
AdapterRequest
...
...
vllm/prompt_adapter/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# code borrowed from: https://github.com/huggingface/peft/blob/v0.12.0/src/peft/utils/save_and_load.py#L420
import
os
...
...
vllm/prompt_adapter/worker_manager.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
logging
from
typing
import
Any
,
Optional
,
Set
,
Type
...
...
vllm/sampling_params.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Sampling parameters for text generation."""
import
copy
from
dataclasses
import
dataclass
...
...
vllm/scalar_type.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
functools
import
struct
from
dataclasses
import
dataclass
...
...
vllm/scripts.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# The CLI entrypoint to vLLM.
import
argparse
import
os
...
...
vllm/sequence.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Sequence and its related classes."""
import
copy
import
enum
...
...
vllm/spec_decode/batch_expansion.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
array
import
array
from
itertools
import
chain
,
count
from
typing
import
Iterator
,
List
,
Optional
,
Tuple
...
...
vllm/spec_decode/draft_model_runner.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
import
torch
...
...
vllm/spec_decode/interfaces.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
abc
import
ABC
,
abstractmethod
from
dataclasses
import
dataclass
from
typing
import
List
,
Optional
,
Set
,
Union
...
...
Prev
1
…
45
46
47
48
49
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment