[Feature] Add FIPS 140-3 compliant hash algorithm option for multimodal hashing (#32386)

Signed-off-by: Karan Bansal <karanb192@gmail.com>

[Feature] Add FIPS 140-3 compliant hash algorithm option for multimodal hashing (#32386)
Signed-off-by: Karan Bansal <karanb192@gmail.com>
3055232b · Karan Bansal · GitHub · 965765ae · 3055232b · 3055232b
Unverified Commit 3055232b authored Jan 18, 2026 by Karan Bansal Committed by GitHub Jan 18, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 3 deletions

vllm/envs.py vllm/envs.py +12 -0

vllm/multimodal/hasher.py vllm/multimodal/hasher.py +34 -3

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -73,6 +73,7 @@ if TYPE_CHECKING:
    VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
    VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
    VLLM_MEDIA_CONNECTOR: str = "http"
+    VLLM_MM_HASHER_ALGORITHM: str = "blake3"
    VLLM_TARGET_DEVICE: str = "cuda"
    VLLM_MAIN_CUDA_VERSION: str = "12.9"
    VLLM_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
@@ -806,6 +807,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # imported at runtime.
    # If a non-existing backend is used, an AssertionError will be thrown.
    "VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
+    # Hash algorithm for multimodal content hashing.
+    # - "blake3": Default, fast cryptographic hash (not FIPS 140-3 compliant)
+    # - "sha256": FIPS 140-3 compliant, widely supported
+    # - "sha512": FIPS 140-3 compliant, faster on 64-bit systems
+    # Use sha256 or sha512 for FIPS compliance in government/enterprise deployments
+    "VLLM_MM_HASHER_ALGORITHM": env_with_choices(
+        "VLLM_MM_HASHER_ALGORITHM",
+        "blake3",
+        ["blake3", "sha256", "sha512"],
+        case_sensitive=False,
+    ),
    # Path to the XLA persistent cache directory.
    # Only used for XLA devices such as TPUs.
    "VLLM_XLA_CACHE_PATH": lambda: os.path.expanduser(

--- a/vllm/multimodal/hasher.py
+++ b/vllm/multimodal/hasher.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import functools
+import hashlib
 import pickle
 import uuid
-from collections.abc import Iterable
+from collections.abc import Callable, Iterable
 import numpy as np
 import torch
-from blake3 import blake3
 from PIL import Image
+import vllm.envs as envs
 from vllm.logger import init_logger
 from .media import MediaWithBytes
@@ -17,6 +19,34 @@ from .media import MediaWithBytes
 logger = init_logger(__name__)
+@functools.lru_cache(maxsize=3)
+def _get_hasher_factory(algorithm: str) -> Callable[[], "hashlib._Hash"]:
+    """
+    Get the hasher factory based on the configured algorithm.
+    Args:
+        algorithm: Hash algorithm name (blake3, sha256, or sha512)
+    Returns a callable that creates a new hasher instance.
+    Supports blake3 (default), sha256, and sha512 for FIPS compliance.
+    See: https://github.com/vllm-project/vllm/issues/18334
+    """
+    algorithm = algorithm.lower()
+    if algorithm == "blake3":
+        from blake3 import blake3
+        return blake3
+    elif algorithm == "sha256":
+        return hashlib.sha256
+    elif algorithm == "sha512":
+        return hashlib.sha512
+    else:
+        # This should never happen due to env_with_choices validation
+        raise ValueError(f"Unsupported hash algorithm: {algorithm}")
 class MultiModalHasher:
    @classmethod
    def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
@@ -114,7 +144,8 @@ class MultiModalHasher:
    @classmethod
    def hash_kwargs(cls, **kwargs: object) -> str:
-        hasher = blake3()
+        hasher_factory = _get_hasher_factory(envs.VLLM_MM_HASHER_ALGORITHM)
+        hasher = hasher_factory()
        for k, v in kwargs.items():
            for bytes_ in cls.iter_item_to_bytes(k, v):