Unverified Commit 781ddf78 authored by Luka Govedič's avatar Luka Govedič Committed by GitHub
Browse files

[CI][torch.compile] Fix incorrect filtering for E2E fusion tests on B200 (#34031)


Signed-off-by: default avatarLuka Govedič <lgovedic@redhat.com>
parent 64a9c252
...@@ -121,13 +121,10 @@ steps: ...@@ -121,13 +121,10 @@ steps:
optional: true optional: true
commands: commands:
- nvidia-smi - nvidia-smi
# Run all models and attn backends but only Inductor partition and native custom ops # Run all models but only FLASHINFER, Inductor partition and native custom ops
# -k "inductor_partition and not +rms_norm and not +quant_fp8"
# Qwen requires +quant_fp8 as -quant_fp8 rms+quant fusion is not supported # Qwen requires +quant_fp8 as -quant_fp8 rms+quant fusion is not supported
# -k "inductor_partition and not +rms_norm and +quant_fp8 and qwen3" # Run just llama3 (fp8 & fp4) for all config combinations (only inductor partition)
# Run just llama3 (fp8 & fp4) for all config combinations - pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "inductor_partition and (FLASHINFER and not +rms_norm and (not +quant_fp8 or +quant_fp8 and qwen3) or llama-3)"
# -k "llama-3"
- pytest -v -s tests/compile/fusions_e2e/test_tp1_quant.py -k "inductor_partition and not +rms_norm and not +quant_fp8" -k "inductor_partition and not +rms_norm and +quant_fp8 and qwen3" -k "llama-3"
- label: Fusion E2E TP2 Quick (H100) - label: Fusion E2E TP2 Quick (H100)
timeout_in_minutes: 20 timeout_in_minutes: 20
...@@ -162,7 +159,7 @@ steps: ...@@ -162,7 +159,7 @@ steps:
- tests/compile/fusions_e2e/ - tests/compile/fusions_e2e/
commands: commands:
- nvidia-smi - nvidia-smi
# Run just llama3 (fp4 & fp8 & bf16) for all config combinations # Run just llama3 (fp8 & bf16) for all config combinations
- pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py -k "llama-3" - pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py -k "llama-3"
- label: Fusion E2E TP2 AsyncTP Config Sweep (H100) - label: Fusion E2E TP2 AsyncTP Config Sweep (H100)
...@@ -197,7 +194,8 @@ steps: ...@@ -197,7 +194,8 @@ steps:
- tests/compile/fusions_e2e/ - tests/compile/fusions_e2e/
commands: commands:
- nvidia-smi - nvidia-smi
# Run all models and attn backends but only Inductor partition and native custom ops # Run all models but only FLASHINFER, Inductor partition and native custom ops
# include qwen with +quant_fp8 as -quant_fp8 rms+quant fusion is not supported
# for ar-rms-quant-fp4, also sweep llama3 # for ar-rms-quant-fp4, also sweep llama3
- pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py -k "inductor_partition and not +rms_norm and not +quant_fp8" -k "Llama-3.1-8B-Instruct-FP4" - pytest -v -s tests/compile/fusions_e2e/test_tp2_ar_rms.py -k "(FLASHINFER and inductor_partition and not +rms_norm and (not +quant_fp8 or +quant_fp8 and qwen3)) or Llama-3.1-8B-Instruct-FP4"
- pytest -v -s tests/compile/fusions_e2e/test_tp2_async_tp.py -k "inductor_partition and not +rms_norm and not +quant_fp8" - pytest -v -s tests/compile/fusions_e2e/test_tp2_async_tp.py -k "FLASHINFER and inductor_partition and not +rms_norm and (not +quant_fp8 or +quant_fp8 and qwen3)"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment