Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cf16c82a
Commit
cf16c82a
authored
Sep 29, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds
parents
50bed026
484fcfca
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
6 deletions
+8
-6
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
...ted/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+6
-5
vllm/distributed/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py
...ted/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py
+1
-1
vllm/platforms/rocm.py
vllm/platforms/rocm.py
+1
-0
No files found.
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
View file @
cf16c82a
...
@@ -55,11 +55,6 @@ class ReqMeta:
...
@@ -55,11 +55,6 @@ class ReqMeta:
slot_mapping
=
slot_mapping
,
slot_mapping
=
slot_mapping
,
)
)
self
.
parallel_config
=
vllm_config
.
parallel_config
self
.
model_config
=
vllm_config
.
model_config
self
.
total_num_hidden_layers
=
getattr
(
self
.
model_config
.
hf_text_config
,
"num_hidden_layers"
,
0
)
self
.
pp_size
=
self
.
parallel_config
.
pipeline_parallel_size
@
dataclass
@
dataclass
class
P2pNcclConnectorMetadata
(
KVConnectorMetadata
):
class
P2pNcclConnectorMetadata
(
KVConnectorMetadata
):
...
@@ -100,6 +95,12 @@ class P2pNcclConnector(KVConnectorBase_V1):
...
@@ -100,6 +95,12 @@ class P2pNcclConnector(KVConnectorBase_V1):
hostname
=
""
,
hostname
=
""
,
port_offset
=
self
.
_rank
,
port_offset
=
self
.
_rank
,
)
if
role
==
KVConnectorRole
.
WORKER
else
None
)
if
role
==
KVConnectorRole
.
WORKER
else
None
self
.
parallel_config
=
vllm_config
.
parallel_config
self
.
model_config
=
vllm_config
.
model_config
self
.
total_num_hidden_layers
=
getattr
(
self
.
model_config
.
hf_text_config
,
"num_hidden_layers"
,
0
)
self
.
pp_size
=
self
.
parallel_config
.
pipeline_parallel_size
# ==============================
# ==============================
# Worker-side methods
# Worker-side methods
...
...
vllm/distributed/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py
View file @
cf16c82a
...
@@ -63,7 +63,7 @@ class TensorMemoryPool:
...
@@ -63,7 +63,7 @@ class TensorMemoryPool:
than min_block_size
than min_block_size
"""
"""
def
__init__
(
self
,
max_block_size
:
int
,
min_block_size
:
int
=
5
12
):
def
__init__
(
self
,
max_block_size
:
int
,
min_block_size
:
int
=
12
8
):
if
max_block_size
<=
0
or
min_block_size
<=
0
:
if
max_block_size
<=
0
or
min_block_size
<=
0
:
raise
ValueError
(
"Block sizes must be positive"
)
raise
ValueError
(
"Block sizes must be positive"
)
if
max_block_size
<
min_block_size
:
if
max_block_size
<
min_block_size
:
...
...
vllm/platforms/rocm.py
View file @
cf16c82a
...
@@ -279,6 +279,7 @@ class RocmPlatform(Platform):
...
@@ -279,6 +279,7 @@ class RocmPlatform(Platform):
logger
.
info_once
(
"Using Flash Attention backend on V1 engine. (only supports block size 64)"
)
logger
.
info_once
(
"Using Flash Attention backend on V1 engine. (only supports block size 64)"
)
return
FLASH_ATTN_V1
return
FLASH_ATTN_V1
else
:
else
:
os
.
environ
[
'VLLM_USE_FLASH_ATTN_PA'
]
=
'0'
logger
.
info_once
(
"Using Triton backend on V1 engine."
)
logger
.
info_once
(
"Using Triton backend on V1 engine."
)
return
TRITON_ATTN_VLLM_V1
return
TRITON_ATTN_VLLM_V1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment