[Minor] Add some clarifying comments to recent changes (#27130)

Signed-off-by: Nick Hill <nhill@redhat.com>

[Minor] Add some clarifying comments to recent changes (#27130)
Signed-off-by: Nick Hill <nhill@redhat.com>
3b450752 · Nick Hill · GitHub · 168e578e · 3b450752 · 3b450752
Unverified Commit 3b450752 authored Oct 18, 2025 by Nick Hill Committed by GitHub Oct 18, 2025
Showing with 10 additions and 2 deletions

vllm/distributed/device_communicators/shm_broadcast.py vllm/distributed/device_communicators/shm_broadcast.py +7 -1

vllm/v1/core/sched/output.py vllm/v1/core/sched/output.py +3 -1

No files found.
--- a/vllm/distributed/device_communicators/shm_broadcast.py
+++ b/vllm/distributed/device_communicators/shm_broadcast.py
@@ -236,7 +236,9 @@ class MessageQueue:
        n_reader,  # number of all readers
        n_local_reader,  # number of local readers through shared memory
        local_reader_ranks: list[int] | None = None,
-        max_chunk_bytes: int = 1024 * 1024 * 24,  # 24MiB
+        # Default of 24MiB chosen to be large enough to accommodate grammar
+        # bitmask tensors for large batches (1024 requests).
+        max_chunk_bytes: int = 1024 * 1024 * 24,
        max_chunks: int = 10,
        connect_ip: str | None = None,
    ):
@@ -538,6 +540,10 @@ class MessageQueue:
                    buf[0] = 1  # overflow
                self.local_socket.send_multipart(all_buffers, copy=False)
            else:
+                # Byte 0: 0
+                # Bytes 1-2: Count of buffers
+                # Then each buffer follows, preceded by 4 bytes containing its length:
+                # [4 byte int L][L bytes of buffer content] ...
                with self.acquire_write(timeout) as buf:
                    buf[0] = 0  # not overflow
                    offset = 3

--- a/vllm/v1/core/sched/output.py
+++ b/vllm/v1/core/sched/output.py
@@ -165,7 +165,9 @@ class SchedulerOutput:
    # freed from the encoder cache.
    free_encoder_mm_hashes: list[str]
-    # ids of structured outputs requests included in the bitmask, in order.
+    # ids of structured outputs requests included in the bitmask, in the
+    # same order as the corresponding stacked rows of the bitmask.
+    # There may be more than one row per request in the case of speculative decoding.
    structured_output_request_ids: list[str]
    # the bitmask for the whole batch
    grammar_bitmask: "npt.NDArray[np.int32] | None"