[Docs] Replace `rst` style double-backtick with `md` single-backtick (#27091)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[Docs] Replace `rst` style double-backtick with `md` single-backtick (#27091)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6c9fdbf7 · Harry Mellor · GitHub · 483ea646 · 6c9fdbf7 · 6c9fdbf7
Unverified Commit 6c9fdbf7 authored Oct 17, 2025 by Harry Mellor Committed by GitHub Oct 17, 2025
11 changed files
--- a/vllm/model_executor/parameter.py
+++ b/vllm/model_executor/parameter.py
@@ -70,7 +70,7 @@ class BasevLLMParameter(Parameter):
        # NOTE(@ksayers) some models such as mamba_mixer2 override the
        # weight loader to support custom loading. In the future, model-specific
        # weight loading should be implemented via Model.load_weights. In the
-        # meantime, support deleting and overriding `weight_loader`` attribute
+        # meantime, support deleting and overriding `weight_loader` attribute
        if self._weight_loader is None:
            raise AttributeError(
                f"{self.__class__.__name__} weight_loader attribute has been deleted"

--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -332,8 +332,8 @@ class PromptInsertion(PromptUpdate):

    Example:

-    For each image, insert a number of ``<image>`` feature placeholders
-    equal to the feature size of the vision encoder after the ``<s>`` token:
+    For each image, insert a number of `<image>` feature placeholders
+    equal to the feature size of the vision encoder after the `<s>` token:

    ```python
    PromptInsertion(
@@ -353,7 +353,7 @@ class PromptInsertion(PromptUpdate):
    )
    ```

-    Insert these tokens after a prefix ``Images:``:
+    Insert these tokens after a prefix `Images:`:

    ```python
    PromptInsertion(
@@ -401,8 +401,8 @@ class PromptReplacement(PromptUpdate):

    Example:

-    For each image, replace one ``<image>`` input placeholder in the prompt
-    with a number of ``<image>`` feature placeholders
+    For each image, replace one `<image>` input placeholder in the prompt
+    with a number of `<image>` feature placeholders
    equal to the feature size of the vision encoder:

    ```python
@@ -413,8 +413,8 @@ class PromptReplacement(PromptUpdate):
    )
    ```

-    As above, but further pad the feature placeholders with ``<image_bos>``
-    and `<image_eos>``, which are not supposed to be passed to the vision
+    As above, but further pad the feature placeholders with `<image_bos>`
+    and `<image_eos>`, which are not supposed to be passed to the vision
    encoder:

    ```python

--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -307,7 +307,7 @@ class MultiModalRegistry:
        """
        Create dummy data for profiling the memory usage of a model.

-        The model is identified by ``model_config``.
+        The model is identified by `model_config`.
        """
        processor = self.create_processor(model_config, cache=cache)
        profiler: MultiModalProfiler = MultiModalProfiler(processor)
@@ -340,7 +340,7 @@ class MultiModalRegistry:
        """
        Create dummy data for profiling the memory usage of a model.

-        The model is identified by ``model_config``.
+        The model is identified by `model_config`.
        """
        processor = self.create_processor(model_config, cache=cache)
        profiler: MultiModalProfiler = MultiModalProfiler(processor)

--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -75,7 +75,7 @@ _ROCM_DEVICE_ID_NAME_MAP: dict[str, str] = {
    "0x74bd": "AMD_Instinct_MI300X_HF",
 }

-# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES``
+# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES`
 if "HIP_VISIBLE_DEVICES" in os.environ:
    val = os.environ["HIP_VISIBLE_DEVICES"]
    if cuda_val := os.environ.get("CUDA_VISIBLE_DEVICES", None):

--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -168,7 +168,7 @@ class XPUPlatform(Platform):
                parallel_config.distributed_executor_backend = "uni"
        elif parallel_config.distributed_executor_backend == "mp":
            # FIXME(kunshang):
-            # spawn needs calling `if __name__ == '__main__':``
+            # spawn needs calling `if __name__ == '__main__':`
            # fork is not supported for xpu start new process.
            if envs.VLLM_WORKER_MULTIPROC_METHOD != "spawn":
                os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -306,10 +306,10 @@ class SamplingParams(
        )

    def __post_init__(self) -> None:
-        # how we deal with `best_of``:
-        # if `best_of`` is not set, we default to `n`;
-        # if `best_of`` is set, we set `n`` to `best_of`,
-        # and set `_real_n`` to the original `n`.
+        # how we deal with `best_of`:
+        # if `best_of` is not set, we default to `n`;
+        # if `best_of` is set, we set `n` to `best_of`,
+        # and set `_real_n` to the original `n`.
        # when we return the result, we will check
        # if we need to return `n` or `_real_n` results
        if self.best_of:

--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -21,7 +21,7 @@ from vllm.utils import cdiv, has_deep_gemm

 @functools.cache
 def is_deep_gemm_supported() -> bool:
-    """Return ``True`` if DeepGEMM is supported on the current platform.
+    """Return `True` if DeepGEMM is supported on the current platform.
    Currently, only Hopper and Blackwell GPUs are supported.
    """
    is_supported_arch = current_platform.is_cuda() and (
@@ -33,7 +33,7 @@ def is_deep_gemm_supported() -> bool:

 @functools.cache
 def is_deep_gemm_e8m0_used() -> bool:
-    """Return ``True`` if vLLM is configured to use DeepGEMM "
+    """Return `True` if vLLM is configured to use DeepGEMM "
    "E8M0 scale on a Hopper or Blackwell-class GPU.
    """
    if not is_deep_gemm_supported():
@@ -311,9 +311,9 @@ def calc_diff(x: torch.Tensor, y: torch.Tensor):
    """Return a global difference metric for unit tests.

    DeepGEMM kernels on Blackwell/B200 currently exhibit noticeable per-element
-    error, causing ``torch.testing.assert_close`` to fail.  Instead of checking
+    error, causing `torch.testing.assert_close` to fail.  Instead of checking
    every element, we compute a cosine-style similarity over the whole tensor
-    and report ``1 - sim``.  Once kernel accuracy improves this helper can be
+    and report `1 - sim`.  Once kernel accuracy improves this helper can be
    removed.
    """


--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -34,7 +34,7 @@ FLASHINFER_CUBINS_REPOSITORY = os.environ.get(

 @functools.cache
 def has_flashinfer() -> bool:
-    """Return ``True`` if FlashInfer is available."""
+    """Return `True` if FlashInfer is available."""
    # Use find_spec to check if the module exists without importing it
    # This avoids potential CUDA initialization side effects
    if importlib.util.find_spec("flashinfer") is None:
@@ -114,13 +114,13 @@ autotune = _lazy_import_wrapper(

 @functools.cache
 def has_flashinfer_comm() -> bool:
-    """Return ``True`` if FlashInfer comm module is available."""
+    """Return `True` if FlashInfer comm module is available."""
    return has_flashinfer() and importlib.util.find_spec("flashinfer.comm") is not None


 @functools.cache
 def has_flashinfer_all2all() -> bool:
-    """Return ``True`` if FlashInfer mnnvl all2all is available."""
+    """Return `True` if FlashInfer mnnvl all2all is available."""
    if not has_flashinfer_comm():
        return False

@@ -141,7 +141,7 @@ def has_flashinfer_all2all() -> bool:

 @functools.cache
 def has_flashinfer_moe() -> bool:
-    """Return ``True`` if FlashInfer MoE module is available."""
+    """Return `True` if FlashInfer MoE module is available."""
    return (
        has_flashinfer()
        and importlib.util.find_spec("flashinfer.fused_moe") is not None
@@ -150,7 +150,7 @@ def has_flashinfer_moe() -> bool:

 @functools.cache
 def has_flashinfer_cutlass_fused_moe() -> bool:
-    """Return ``True`` if FlashInfer CUTLASS fused MoE is available."""
+    """Return `True` if FlashInfer CUTLASS fused MoE is available."""
    if not has_flashinfer_moe():
        return False

@@ -171,7 +171,7 @@ def has_flashinfer_cutlass_fused_moe() -> bool:

 @functools.cache
 def has_nvidia_artifactory() -> bool:
-    """Return ``True`` if NVIDIA's artifactory is accessible.
+    """Return `True` if NVIDIA's artifactory is accessible.

    This checks connectivity to the kernel inference library artifactory
    which is required for downloading certain cubin kernels like TRTLLM FHMA.
@@ -218,9 +218,9 @@ def _force_use_trtllm_attention(env_value: bool | None) -> bool | None:

 def force_use_trtllm_attention() -> bool | None:
    """
-    Return ``None`` if VLLM_USE_TRTLLM_ATTENTION is not set,
-    return ``True`` if TRTLLM attention is forced to be used,
-    return ``False`` if TRTLLM attention is forced to be not used.
+    Return `None` if VLLM_USE_TRTLLM_ATTENTION is not set,
+    return `True` if TRTLLM attention is forced to be used,
+    return `False` if TRTLLM attention is forced to be not used.
    """
    return _force_use_trtllm_attention(envs.VLLM_USE_TRTLLM_ATTENTION)

@@ -244,7 +244,7 @@ def use_trtllm_attention(
    has_sinks: bool = False,
    has_spec: bool = False,
 ) -> bool:
-    """Return ``True`` if TRTLLM attention is used."""
+    """Return `True` if TRTLLM attention is used."""
    force_use_trtllm = force_use_trtllm_attention()

    # Environment variable is set to 0 - respect it

--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -26,17 +26,17 @@ from vllm.v1.kv_cache_interface import (
 from vllm.v1.request import Request

 # BlockHash represents the hash of a single KV-cache block used for
-# prefix caching.  Treating it as a distinct type from ``bytes`` helps
+# prefix caching.  Treating it as a distinct type from `bytes` helps
 # catch accidental misuse when passing around raw byte strings.
 BlockHash = NewType("BlockHash", bytes)

-# ``BlockHashWithGroupId`` combines a ``BlockHash`` with its KV cache group ID.
+# `BlockHashWithGroupId` combines a `BlockHash` with its KV cache group ID.
 # It is represented as raw bytes for compactness and efficiency. The helper
-# functions below pack/unpack the ``BlockHash`` and group id into/from the key.
+# functions below pack/unpack the `BlockHash` and group id into/from the key.
 BlockHashWithGroupId = NewType("BlockHashWithGroupId", bytes)

 # ExternalBlockHash is used for reproducible prefix-cache block hashing.
-# It's a union of ``bytes`` and ``int`` to keep backward compatibility
+# It's a union of `bytes` and `int` to keep backward compatibility
 # after we default block hashing to use sha256 bytes.
 ExternalBlockHash: TypeAlias = bytes | int

@@ -44,7 +44,7 @@ ExternalBlockHash: TypeAlias = bytes | int
 def make_block_hash_with_group_id(
    block_hash: BlockHash, group_id: int
 ) -> BlockHashWithGroupId:
-    """Pack a ``BlockHash`` and group id into a ``BlockHashWithGroupId``.
+    """Pack a `BlockHash` and group id into a `BlockHashWithGroupId`.

    The group id is encoded using 4 bytes in big-endian order and appended to
    the block hash bytes.  This representation avoids creating tuples while
@@ -54,12 +54,12 @@ def make_block_hash_with_group_id(


 def get_block_hash(key: BlockHashWithGroupId) -> BlockHash:
-    """Extract the ``BlockHash`` from a ``BlockHashWithGroupId``."""
+    """Extract the `BlockHash` from a `BlockHashWithGroupId`."""
    return BlockHash(key[:-4])


 def get_group_id(key: BlockHashWithGroupId) -> int:
-    """Extract the group id from a ``BlockHashWithGroupId``."""
+    """Extract the group id from a `BlockHashWithGroupId`."""
    return int.from_bytes(key[-4:], "big", signed=False)



--- a/vllm/v1/worker/cpu_worker.py
+++ b/vllm/v1/worker/cpu_worker.py
@@ -128,7 +128,7 @@ class CPUWorker(Worker):
            "Please try to bind threads manually."
        )

-        # Get CPUs on NUMA node `allowed_numa_nodes[local_rank]``
+        # Get CPUs on NUMA node `allowed_numa_nodes[local_rank]`
        selected_numa_node = allowed_numa_nodes[self.local_rank]  # type: ignore
        logical_cpu_list = [
            x for x in logical_cpu_list if x.numa_node == selected_numa_node

--- a/vllm/v1/worker/tpu_worker.py
+++ b/vllm/v1/worker/tpu_worker.py
@@ -182,8 +182,8 @@ class TPUWorker:
            if isinstance(layer_spec, AttentionSpec):
                dtype = layer_spec.dtype

-                # Use an empty tensor instead of `None`` to force Dynamo to pass
-                # it by reference, rather by specializing on the value ``None``.
+                # Use an empty tensor instead of `None` to force Dynamo to pass
+                # it by reference, rather by specializing on the value `None`.
                tpu_kv_cache = torch.tensor([], dtype=dtype).to(self.device)
                kv_caches[layer_name] = tpu_kv_cache
            else: