Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
032d661d
Unverified
Commit
032d661d
authored
Sep 20, 2025
by
Wenlong Wang
Committed by
GitHub
Sep 20, 2025
Browse files
[Docs] Fix warnings in mkdocs build (continued) (#25042)
Signed-off-by:
wwl2755
<
wangwenlong2755@gmail.com
>
parent
e08a3a3f
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
24 additions
and
15 deletions
+24
-15
vllm/multimodal/__init__.py
vllm/multimodal/__init__.py
+1
-1
vllm/utils/__init__.py
vllm/utils/__init__.py
+3
-3
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/kv_cache_utils.py
+2
-2
vllm/v1/sample/rejection_sampler.py
vllm/v1/sample/rejection_sampler.py
+5
-5
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+1
-1
vllm/v1/worker/utils.py
vllm/v1/worker/utils.py
+2
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+10
-2
No files found.
vllm/multimodal/__init__.py
View file @
032d661d
...
@@ -15,7 +15,7 @@ is used by model runners to dispatch data processing according to the target
...
@@ -15,7 +15,7 @@ is used by model runners to dispatch data processing according to the target
model.
model.
Info:
Info:
[mm_processing](../../../design/mm_processing.
html
)
[mm_processing](../../../design/mm_processing.
md
)
"""
"""
__all__
=
[
__all__
=
[
...
...
vllm/utils/__init__.py
View file @
032d661d
...
@@ -3273,7 +3273,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
...
@@ -3273,7 +3273,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
and
getattr
(
cfg
.
attn_config
,
"alibi"
,
False
)))))
and
getattr
(
cfg
.
attn_config
,
"alibi"
,
False
)))))
def
sha256
(
input
)
->
bytes
:
def
sha256
(
input
:
Any
)
->
bytes
:
"""Hash any picklable Python object using SHA-256.
"""Hash any picklable Python object using SHA-256.
The input is serialized using pickle before hashing, which allows
The input is serialized using pickle before hashing, which allows
...
@@ -3290,7 +3290,7 @@ def sha256(input) -> bytes:
...
@@ -3290,7 +3290,7 @@ def sha256(input) -> bytes:
return
hashlib
.
sha256
(
input_bytes
).
digest
()
return
hashlib
.
sha256
(
input_bytes
).
digest
()
def
sha256_cbor
(
input
)
->
bytes
:
def
sha256_cbor
(
input
:
Any
)
->
bytes
:
"""
"""
Hash objects using CBOR serialization and SHA-256.
Hash objects using CBOR serialization and SHA-256.
...
...
vllm/v1/core/kv_cache_utils.py
View file @
032d661d
vllm/v1/sample/rejection_sampler.py
View file @
032d661d
...
@@ -351,17 +351,17 @@ def generate_uniform_probs(
...
@@ -351,17 +351,17 @@ def generate_uniform_probs(
without a seed.
without a seed.
Args:
Args:
num_tokens
: int
num_tokens: int
Total number of tokens.
Total number of tokens.
num_draft_tokens
: List[List[int]]
num_draft_tokens: List[List[int]]
Number of draft tokens per request.
Number of draft tokens per request.
generators
: Optional[Dict[int, torch.Generator]]
generators: Optional[Dict[int, torch.Generator]]
A dictionary mapping indices in the batch to
A dictionary mapping indices in the batch to
`torch.Generator` objects.
`torch.Generator` objects.
device
: torch.device
device: torch.device
The device on which to allocate the tensor.
The device on which to allocate the tensor.
Returns:
Returns:
uniform_rand
: torch.Tensor
uniform_rand: torch.Tensor
A tensor of shape `(num_tokens, )` containing uniform
A tensor of shape `(num_tokens, )` containing uniform
random values in the range [0, 1).
random values in the range [0, 1).
"""
"""
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
032d661d
vllm/v1/worker/utils.py
View file @
032d661d
...
@@ -205,7 +205,8 @@ def gather_mm_placeholders(
...
@@ -205,7 +205,8 @@ def gather_mm_placeholders(
"""
"""
Reconstructs the embeddings from the placeholder tokens.
Reconstructs the embeddings from the placeholder tokens.
This is the operation of [scatter_mm_placeholders][].
This is the operation of [`scatter_mm_placeholders`]
[vllm.v1.worker.utils.scatter_mm_placeholders].
"""
"""
if
is_embed
is
None
:
if
is_embed
is
None
:
return
placeholders
return
placeholders
...
...
vllm/worker/model_runner.py
View file @
032d661d
...
@@ -1810,7 +1810,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
...
@@ -1810,7 +1810,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
return
[
output
]
return
[
output
]
def
need_recv_kv
(
self
,
model_input
,
kv_caches
)
->
bool
:
def
need_recv_kv
(
self
,
model_input
:
ModelInputForGPUWithSamplingMetadata
,
kv_caches
:
List
[
torch
.
Tensor
])
->
bool
:
"""Check if we need to receive kv-cache from the other worker.
"""Check if we need to receive kv-cache from the other worker.
We need to receive KV when
We need to receive KV when
1. current vLLM instance is KV cache consumer/decode vLLM instance
1. current vLLM instance is KV cache consumer/decode vLLM instance
...
@@ -1825,6 +1826,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
...
@@ -1825,6 +1826,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
if
self
.
vllm_config
.
kv_transfer_config
is
None
:
if
self
.
vllm_config
.
kv_transfer_config
is
None
:
return
False
return
False
if
model_input
.
attn_metadata
is
None
:
raise
ValueError
(
"model_input.attn_metadata cannot be None"
)
prefill_meta
=
model_input
.
attn_metadata
.
prefill_metadata
prefill_meta
=
model_input
.
attn_metadata
.
prefill_metadata
# check if the current run is profiling
# check if the current run is profiling
...
@@ -1835,7 +1839,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
...
@@ -1835,7 +1839,8 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
return
self
.
vllm_config
.
kv_transfer_config
.
is_kv_consumer
and
(
return
self
.
vllm_config
.
kv_transfer_config
.
is_kv_consumer
and
(
not
is_profile_run
)
and
is_prefill_run
not
is_profile_run
)
and
is_prefill_run
def
need_send_kv
(
self
,
model_input
,
kv_caches
)
->
bool
:
def
need_send_kv
(
self
,
model_input
:
ModelInputForGPUWithSamplingMetadata
,
kv_caches
:
List
[
torch
.
Tensor
])
->
bool
:
"""Check if we need to send kv-cache to the other worker.
"""Check if we need to send kv-cache to the other worker.
We need to send KV when
We need to send KV when
1. current vLLM instance is KV cache producer/prefill vLLM instance
1. current vLLM instance is KV cache producer/prefill vLLM instance
...
@@ -1850,6 +1855,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
...
@@ -1850,6 +1855,9 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
if
self
.
vllm_config
.
kv_transfer_config
is
None
:
if
self
.
vllm_config
.
kv_transfer_config
is
None
:
return
False
return
False
if
model_input
.
attn_metadata
is
None
:
raise
ValueError
(
"model_input.attn_metadata cannot be None"
)
prefill_meta
=
model_input
.
attn_metadata
.
prefill_metadata
prefill_meta
=
model_input
.
attn_metadata
.
prefill_metadata
# check if the current run is profiling
# check if the current run is profiling
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment