# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import torch ToleranceSpec = dict[torch.dtype, dict[str, float]] # Default tolerances for comparing IR op implementations against native. # These are intentionally conservative (permissive) to avoid false failures # across different hardware and kernel implementations. Ops that need tighter # or looser bounds should use override_tolerance. DEFAULT_TOLERANCES: ToleranceSpec = { # 52-bit mantissa; machine epsilon ~1.1e-16 torch.float64: {"atol": 1e-8, "rtol": 1e-8}, # 23-bit mantissa; machine epsilon ~1.2e-7. # Values from PyTorch test_transformers.py reference defaults. torch.float32: {"atol": 1e-5, "rtol": 1.3e-6}, # 10-bit mantissa; machine epsilon ~9.8e-4. # Standard tolerance used across vLLM kernel tests. torch.float16: {"atol": 1e-3, "rtol": 1e-3}, # 7-bit mantissa; machine epsilon ~7.8e-3. # Wider rtol than float16 to account for the coarser mantissa. torch.bfloat16: {"atol": 1e-3, "rtol": 1.6e-2}, # 3-bit mantissa; machine epsilon ~6.25e-2. # Derived from vLLM fp8 kernel tests (merge_attn_states, silu_mul_fp8). torch.float8_e4m3fn: {"atol": 1e-1, "rtol": 1e-1}, # 2-bit mantissa; machine epsilon ~1.25e-1. # Wider than e4m3fn due to the smaller mantissa. torch.float8_e5m2: {"atol": 2e-1, "rtol": 2e-1}, # 1-bit mantissa; machine epsilon ~2.5e-1. Packed pair format (x2). # Derived from vLLM fp4 tests (test_silu_mul_nvfp4_quant: atol=3e-1). torch.float4_e2m1fn_x2: {"atol": 3e-1, "rtol": 3e-1}, # Integer quantized; off-by-one from rounding is expected. # rtol=0 because relative error is meaningless for small integers. torch.int8: {"atol": 1, "rtol": 0}, }