[Docs] Replace `rst` style double-backtick with `md` single-backtick (#27091)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>

[Docs] Replace `rst` style double-backtick with `md` single-backtick (#27091)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
6c9fdbf7 · Harry Mellor · GitHub · 483ea646 · 6c9fdbf7 · 6c9fdbf7
Unverified Commit 6c9fdbf7 authored Oct 17, 2025 by Harry Mellor Committed by GitHub Oct 17, 2025
20 changed files
--- a/benchmarks/multi_turn/benchmark_serving_multi_turn.py
+++ b/benchmarks/multi_turn/benchmark_serving_multi_turn.py
@@ -1251,7 +1251,7 @@ async def main() -> None:
        default=None,
        help="The model name used in the API. "
        "If not specified, the model name will be the "
-        "same as the ``--model`` argument. ",
+        "same as the `--model` argument. ",
    )

    parser.add_argument(

--- a/docs/models/extensions/fastsafetensor.md
+++ b/docs/models/extensions/fastsafetensor.md
@@ -3,4 +3,4 @@ Loading Model weights with fastsafetensors

 Using fastsafetensors library enables loading model weights to GPU memory by leveraging GPU direct storage. See [their GitHub repository](https://github.com/foundation-model-stack/fastsafetensors) for more details.

-To enable this feature, use the ``--load-format fastsafetensors`` command-line argument
+To enable this feature, use the `--load-format fastsafetensors` command-line argument
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -67,17 +67,17 @@ class _HfExamplesInfo:

    is_available_online: bool = True
    """
-    Set this to ``False`` if the name of this architecture no longer exists on
+    Set this to `False` if the name of this architecture no longer exists on
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
-    """The ``trust_remote_code`` level required to load the model."""
+    """The `trust_remote_code` level required to load the model."""

    hf_overrides: dict[str, Any] = field(default_factory=dict)
-    """The ``hf_overrides`` required to load the model."""
+    """The `hf_overrides` required to load the model."""

    max_model_len: int | None = None
    """

--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -162,7 +162,7 @@ def check_logprobs_close(

            # Test prompt logprobs closeness
            if prompt_logprobs_0 is not None and prompt_logprobs_1 is not None:
-                # Both sequences' prompt logprobs lists are not `None``
+                # Both sequences' prompt logprobs lists are not `None`
                # (although individual list elements may be `None`);
                # for each token's logprobs:
                for idx, (logprobs_elem_0, logprobs_elem_1) in enumerate(

--- a/tools/check_init_lazy_imports.py
+++ b/tools/check_init_lazy_imports.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Ensure we perform lazy loading in vllm/__init__.py.
-i.e: appears only within the ``if typing.TYPE_CHECKING:`` guard,
+i.e: appears only within the `if typing.TYPE_CHECKING:` guard,
 **except** for a short whitelist.
 """


--- a/vllm/assets/base.py
+++ b/vllm/assets/base.py
@@ -21,7 +21,7 @@ def get_cache_dir() -> Path:
 @lru_cache
 def get_vllm_public_assets(filename: str, s3_prefix: str | None = None) -> Path:
    """
-    Download an asset file from ``s3://vllm-public-assets``
+    Download an asset file from `s3://vllm-public-assets`
    and return the path to the downloaded file.
    """
    asset_directory = get_cache_dir() / "vllm_public_assets"

--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -1231,7 +1231,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
        default=None,
        help="The model name used in the API. "
        "If not specified, the model name will be the "
-        "same as the ``--model`` argument. ",
+        "same as the `--model` argument. ",
    )

    parser.add_argument(

--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -138,8 +138,8 @@ def support_torch_compile(
    """

    def cls_decorator_helper(cls: _T) -> _T:
-        # helper to pass `dynamic_arg_dims`` to `_support_torch_compile``
-        # to avoid too much indentation for `_support_torch_compile``
+        # helper to pass `dynamic_arg_dims` to `_support_torch_compile`
+        # to avoid too much indentation for `_support_torch_compile`
        if not hasattr(cls, "forward"):
            raise TypeError("decorated class should have a forward method.")
        sig = inspect.signature(cls.forward)

--- a/vllm/config/pooler.py
+++ b/vllm/config/pooler.py
@@ -66,15 +66,15 @@ class PoolerConfig:
    """
    step_tag_id: int | None = None
    """
-    If set, only the score corresponding to the ``step_tag_id`` in the
+    If set, only the score corresponding to the `step_tag_id` in the
    generated sentence should be returned. Otherwise, the scores for all tokens
    are returned.
    """
    returned_token_ids: list[int] | None = None
    """
    A list of indices for the vocabulary dimensions to be extracted,
-    such as the token IDs of ``good_token`` and ``bad_token`` in the
-    ``math-shepherd-mistral-7b-prm`` model.
+    such as the token IDs of `good_token` and `bad_token` in the
+    `math-shepherd-mistral-7b-prm` model.
    """

    def compute_hash(self) -> str:

--- a/vllm/distributed/kv_events.py
+++ b/vllm/distributed/kv_events.py
@@ -117,7 +117,7 @@ class ZmqEventPublisher(EventPublisher):
    Parameters
    ----------
    endpoint:
-        PUB address. Use ``tcp://*:5557`` to bind or ``tcp://host:5557`` to
+        PUB address. Use `tcp://*:5557` to bind or `tcp://host:5557` to
        connect.
    replay_endpoint:
        Optional ROUTER address for replay requests. When given, subscribers can

--- a/vllm/entrypoints/context.py
+++ b/vllm/entrypoints/context.py
@@ -515,7 +515,7 @@ class StreamingHarmonyContext(HarmonyContext):

    def render_for_completion(self) -> list[int]:
        # now this list of tokens as next turn's starting tokens
-        # `<|start|>assistant``,
+        # `<|start|>assistant`,
        # we need to process them in parser.
        rendered_tokens = super().render_for_completion()


--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1504,7 +1504,7 @@ class LLM:
        """Return a snapshot of aggregated metrics from Prometheus.

        Returns:
-            A ``MetricSnapshot`` instance capturing the current state
+            A `MetricSnapshot` instance capturing the current state
            of all aggregated metrics from Prometheus.

        Note:

--- a/vllm/entrypoints/renderer.py
+++ b/vllm/entrypoints/renderer.py
@@ -26,12 +26,12 @@ class RenderConfig:

    max_length: int | None = None
    """Maximum allowable total input token length. If provided,
-    token inputs longer than this raise ``ValueError``."""
+    token inputs longer than this raise `ValueError`."""

    truncate_prompt_tokens: int | None = None
-    """Number of tokens to keep. ``None`` means no truncation.
-    ``0`` yields an empty list (and skips embeds).
-    ``-1`` maps to ``model_config.max_model_len``."""
+    """Number of tokens to keep. `None` means no truncation.
+    `0` yields an empty list (and skips embeds).
+    `-1` maps to `model_config.max_model_len`."""

    add_special_tokens: bool | None = True
    """Whether to add model-specific special tokens during tokenization."""
@@ -107,10 +107,10 @@ class BaseRenderer(ABC):

        Args:
            prompt_or_prompts: One of:
-                - ``str``: Single text prompt.
-                - ``list[str]``: Batch of text prompts.
-                - ``list[int]``: Single pre-tokenized sequence.
-                - ``list[list[int]]``: Batch of pre-tokenized sequences.
+                - `str`: Single text prompt.
+                - `list[str]`: Batch of text prompts.
+                - `list[int]`: Single pre-tokenized sequence.
+                - `list[list[int]]`: Batch of pre-tokenized sequences.
            config: Render configuration controlling how prompts are prepared
                (e.g., tokenization and length handling).

@@ -134,9 +134,9 @@ class BaseRenderer(ABC):
        Convert text/token and/or base64-encoded embeddings inputs into
        engine-ready prompt objects using a unified RenderConfig.

-        At least one of ``prompt_or_prompts`` or ``prompt_embeds`` must be
+        At least one of `prompt_or_prompts` or `prompt_embeds` must be
        provided and non-empty. If both are omitted or empty (e.g., empty
-        string and empty list), a ``ValueError`` is raised.
+        string and empty list), a `ValueError` is raised.

        Args:
            prompt_or_prompts: Text or token inputs to include.
@@ -150,7 +150,7 @@ class BaseRenderer(ABC):
                Engine-ready prompt objects.

        Raises:
-            ValueError: If both ``prompt_or_prompts`` and ``prompt_embeds``
+            ValueError: If both `prompt_or_prompts` and `prompt_embeds`
                are omitted or empty (decoder prompt cannot be empty), or if
                length limits are exceeded.
        """

--- a/vllm/inputs/data.py
+++ b/vllm/inputs/data.py
@@ -327,7 +327,7 @@ def zip_enc_dec_prompts(
    [`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
    instances.

-    ``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
+    `mm_processor_kwargs` may also be provided; if a dict is passed, the same
    dictionary will be used for every encoder/decoder prompt. If an iterable is
    provided, it will be zipped with the encoder/decoder prompts.
    """

--- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
@@ -27,7 +27,7 @@ __all__ = [


 def is_flashinfer_fp4_cutlass_moe_available() -> bool:
-    """Return ``True`` when FlashInfer CUTLASS NV-FP4 kernels can be used."""
+    """Return `True` when FlashInfer CUTLASS NV-FP4 kernels can be used."""
    return (
        envs.VLLM_USE_FLASHINFER_MOE_FP4
        and has_flashinfer_cutlass_fused_moe()

--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -887,11 +887,11 @@ def requant_weight_ue8m0_inplace(
    UE8M0 (power-of-two) format expected by the new DeepGEMM kernels inplace.

    Args:
-        weight: Block-quantised weight tensor stored in ``torch.float8_e4m3fn``.
-            Expected shape ``(..., M, K)``.
-        weight_scale: Corresponding per-block scale tensor (``torch.float32``)
-            with shape ``(..., M // block_size[0], K // block_size[1])``.
-        block_size: 2-element iterable ``[block_m, block_k]`` describing the
+        weight: Block-quantised weight tensor stored in `torch.float8_e4m3fn`.
+            Expected shape `(..., M, K)`.
+        weight_scale: Corresponding per-block scale tensor (`torch.float32`)
+            with shape `(..., M // block_size[0], K // block_size[1])`.
+        block_size: 2-element iterable `[block_m, block_k]` describing the
            block quantisation granularity.
    """
    if weight.numel() == 0:

--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
@@ -64,7 +64,7 @@ from .utils import (
 class OlmoAttention(nn.Module):
    """
    This is the attention block where the output is computed as
-    ``Attention(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
+    `Attention(LN(x))` in `MLP(LN(x + Attention(LN(x))))`
    (plus another skip connection).
    """

@@ -144,7 +144,7 @@ class OlmoAttention(nn.Module):
 class OlmoMLP(nn.Module):
    """
    This is the MLP block where the output is computed as
-    ``MLP(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
+    `MLP(LN(x))` in `MLP(LN(x + Attention(LN(x))))`
    (plus another skip connection).
    """

@@ -193,7 +193,7 @@ class OlmoMLP(nn.Module):
 class OlmoDecoderLayer(nn.Module):
    """
    This is a typical transformer block where the output is
-    computed as ``MLP(LN(x + Attention(LN(x))))``
+    computed as `MLP(LN(x + Attention(LN(x))))`
    (plus another skip connection).
    """


--- a/vllm/model_executor/models/olmo2.py
+++ b/vllm/model_executor/models/olmo2.py
@@ -69,7 +69,7 @@ from vllm.transformers_utils.configs import Olmo3Config
 class Olmo2Attention(nn.Module):
    """
    This is the attention block where the output is computed as
-    ``Attention(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
+    `Attention(LN(x))` in `MLP(LN(x + Attention(LN(x))))`
    (plus another skip connection).
    """

@@ -190,7 +190,7 @@ class Olmo2Attention(nn.Module):
 class Olmo2MLP(nn.Module):
    """
    This is the MLP block where the output is computed as
-    ``MLP(x)`` in ``LN(MLP(x + LN(Attention(x))))``
+    `MLP(x)` in `LN(MLP(x + LN(Attention(x))))`
    (plus another skip connection).
    """

@@ -235,7 +235,7 @@ class Olmo2MLP(nn.Module):
 class Olmo2DecoderLayer(nn.Module):
    """
    This is a typical transformer block where the output is
-    computed as ``MLP(LN(x + Attention(LN(x))))``
+    computed as `MLP(LN(x + Attention(LN(x))))`
    (plus another skip connection).
    """


--- a/vllm/model_executor/models/ovis.py
+++ b/vllm/model_executor/models/ovis.py
@@ -166,7 +166,7 @@ class VisualTokenizer(torch.nn.Module):
        # e.g., for hidden_stride=2, this leads to a token length reduction:
        # 1024 -> 256 for aimv2
        if self.config.hidden_stride > 1:
-            # this `d` maybe different from the above `d``
+            # this `d` maybe different from the above `d`
            n, L, d = features.shape
            sqrt_l = int(L**0.5)
            assert sqrt_l**2 == L, (

--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -99,13 +99,13 @@ class AutoWeightsLoader:
    the weights only once.

    The weight loading logic for individual modules can be overridden
-    by defining a ``load_weights`` method.
+    by defining a `load_weights` method.

    Similarly, the weight loading logic for individual parameters can be
-    overridden by defining a ``weight_loader`` method.
+    overridden by defining a `weight_loader` method.

    Detailed weight loading information can be viewed by setting the
-    environment variable ``VLLM_LOGGING_LEVEL=DEBUG``.
+    environment variable `VLLM_LOGGING_LEVEL=DEBUG`.
    """

    # Models trained using early version ColossalAI
@@ -372,9 +372,9 @@ def flatten_bn(
    concat: bool = False,
 ) -> list[torch.Tensor] | torch.Tensor:
    """
-    Flatten the ``B`` and ``N`` dimensions of batched multimodal inputs.
+    Flatten the `B` and `N` dimensions of batched multimodal inputs.

-    The input tensor should have shape ``(B, N, ...)```.
+    The input tensor should have shape `(B, N, ...)`.
    """
    if isinstance(x, torch.Tensor):
        return x.flatten(0, 1)
@@ -424,12 +424,12 @@ def _merge_multimodal_embeddings(
    is_multimodal: torch.Tensor,
 ) -> torch.Tensor:
    """
-    Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the
-    positions in ``inputs_embeds`` corresponding to placeholder tokens in
-    ``input_ids``.
+    Merge `multimodal_embeddings` into `inputs_embeds` by overwriting the
+    positions in `inputs_embeds` corresponding to placeholder tokens in
+    `input_ids`.

    Note:
-        This updates ``inputs_embeds`` in place.
+        This updates `inputs_embeds` in place.
    """
    if len(multimodal_embeddings) == 0:
        return inputs_embeds
@@ -475,14 +475,14 @@ def merge_multimodal_embeddings(
    placeholder_token_id: int | list[int],
 ) -> torch.Tensor:
    """
-    Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the
-    positions in ``inputs_embeds`` corresponding to placeholder tokens in
-    ``input_ids``.
+    Merge `multimodal_embeddings` into `inputs_embeds` by overwriting the
+    positions in `inputs_embeds` corresponding to placeholder tokens in
+    `input_ids`.

-    ``placeholder_token_id`` can be a list of token ids (e.g, token ids
+    `placeholder_token_id` can be a list of token ids (e.g, token ids
    of img_start, img_break, and img_end tokens) when needed: This means
-    the order of these tokens in the ``input_ids`` MUST MATCH the order of
-    their embeddings in ``multimodal_embeddings`` since we need to
+    the order of these tokens in the `input_ids` MUST MATCH the order of
+    their embeddings in `multimodal_embeddings` since we need to
    slice-merge instead of individually scattering.

    For example, if input_ids is "TTTTTSIIIBIIIBIIIETTT", where
@@ -497,7 +497,7 @@ def merge_multimodal_embeddings(
    input_ids for a correct embedding merge.

    Note:
-        This updates ``inputs_embeds`` in place.
+        This updates `inputs_embeds` in place.
    """
    if isinstance(placeholder_token_id, list):
        is_multimodal = isin_list(input_ids, placeholder_token_id)