Add warnings and remove dependency for deterministic inference (#10724)

Co-authored-by: Yineng Zhang <me@zhyncs.com>

Add warnings and remove dependency for deterministic inference (#10724)
Co-authored-by: Yineng Zhang <me@zhyncs.com>
aa1c5cf5 · Baizhou Zhang · GitHub · 592caab6 · aa1c5cf5
Unverified Commit aa1c5cf5 authored Sep 22, 2025 by Baizhou Zhang Committed by GitHub Sep 22, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 11 deletions

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +18 -11

No files found.
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -981,29 +981,36 @@ class ServerArgs:
    def _handle_deterministic_inference(self):
        if self.enable_deterministic_inference:
-            import importlib
+            # Check sampling backend
-            if not importlib.util.find_spec("batch_invariant_ops"):
-                raise ValueError(
-                    "batch_invariant_ops is not installed. Please install it from https://github.com/thinking-machines-lab/batch_invariant_ops/."
-                )
-            # Check some settings
            self.sampling_backend = "pytorch"
            logger.warning(
                "Sampling backend is set to pytorch for deterministic inference."
            )
+            # Check attention backend
+            if self.attention_backend not in DETERMINISTIC_ATTENTION_BACKEND_CHOICES:
+                raise ValueError(
+                    f"Currently only {DETERMINISTIC_ATTENTION_BACKEND_CHOICES} attention backends are supported for deterministic inference."
+                )
            # Currently, only FA3 supports radix cache. Support for other backends is in progress
            if self.attention_backend != "fa3":
                self.disable_radix_cache = True
                logger.warning(
-                    "Currently radix cache is disabled for deterministic inference. It will be supported in the future."
+                    f"Currently radix cache is not compatible with {self.attention_backend} attention backend for deterministic inference. It will be supported in the future."
                )
-            if self.attention_backend not in DETERMINISTIC_ATTENTION_BACKEND_CHOICES:
+            # Check TP size
+            if self.tp_size > 1:
                raise ValueError(
-                    f"Currently only {DETERMINISTIC_ATTENTION_BACKEND_CHOICES} attention backends are supported for deterministic inference."
+                    "Currently only TP size 1 is supported for deterministic inference."
                )
+            # Warnings on MoE models
+            logger.warning(
+                "Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
+            )
    def _handle_other_validations(self):
        pass