f"but you explicitly specified '{self.attention_backend}'."
)
ifself.attention_backendnotin["fa3","triton"]:
ifis_deepseek_model:
ifis_deepseek_model:
ifself.attention_backendnotin["fa3","triton"]:
raiseValueError(
f"Currently only fa3 and triton attention backends are supported for deterministic inference with DeepSeek models. But you're using {self.attention_backend}."
f"Currently only {RADIX_SUPPORTED_DETERMINISTIC_ATTENTION_BACKEND} attention backends are supported for deterministic inference with DeepSeek models. But you're using {self.attention_backend}."
)
# Currently, only FA3 and Triton supports radix cache. Support for other backends is in progress
# Currently, only certain backends support radix cache. Support for other backends is in progress
self.disable_radix_cache=True
logger.warning(
f"Currently radix cache is not compatible with {self.attention_backend} attention backend for deterministic inference. It will be supported in the future."