Unverified Commit 36942660 authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

Expand and update test coverage for AMD CI (#10044)

parent 9f5e7018
...@@ -25,13 +25,117 @@ concurrency: ...@@ -25,13 +25,117 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
accuracy-test-1-gpu-amd: check-changes:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') runs-on: ubuntu-latest
outputs:
main_package: ${{ steps.filter.outputs.main_package }}
sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Fail if the PR does not have the 'run-ci' label
if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
run: |
echo "This pull request does not have the 'run-ci' label. Failing the workflow."
exit 1
- name: Fail if the PR is a draft
if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
run: |
echo "This pull request is a draft. Failing the workflow."
exit 1
- name: Detect file changes
id: filter
uses: dorny/paths-filter@v3
with:
filters: |
main_package:
- "python/**"
- "scripts/ci/**"
- "test/**"
- ".github/workflows/pr-test-amd.yml"
sgl_kernel:
- "sgl-kernel/**"
# =============================================== sgl-kernel ====================================================
sgl-kernel-unit-test-amd:
needs: [check-changes]
if: needs.check-changes.outputs.sgl_kernel == 'true'
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1] runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
# =============================================== primary ====================================================
unit-test-frontend-amd:
needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: |
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
run: |
docker exec -w /sglang-checkout/test/lang ci_sglang python3 run_suite.py --suite per-commit
unit-test-backend-1-gpu-amd:
needs: [check-changes, unit-test-frontend-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
...@@ -47,19 +151,20 @@ jobs: ...@@ -47,19 +151,20 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate Accuracy - name: Run test
timeout-minutes: 30 timeout-minutes: 30
run: | run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
accuracy-test-2-gpu-amd: unit-test-backend-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-2] runner: [linux-mi300-gpu-2]
part: [0, 1]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -76,17 +181,20 @@ jobs: ...@@ -76,17 +181,20 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Evaluate accuracy (TP=2) - name: Run test
timeout-minutes: 60 timeout-minutes: 30
run: | run: |
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
mla-test-1-gpu-amd: unit-test-backend-8-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes, unit-test-backend-2-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1] runner: [linux-mi300-gpu-8]
part: [0, 1, 2]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -103,13 +211,15 @@ jobs: ...@@ -103,13 +211,15 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: MLA TEST - name: Run test
timeout-minutes: 30 timeout-minutes: 60
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 test_mla.py bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600
performance-test-1-gpu-part-1-amd: performance-test-1-gpu-part-1-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
...@@ -152,7 +262,9 @@ jobs: ...@@ -152,7 +262,9 @@ jobs:
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
performance-test-1-gpu-part-2-amd: performance-test-1-gpu-part-2-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
...@@ -188,8 +300,10 @@ jobs: ...@@ -188,8 +300,10 @@ jobs:
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8 bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
bench-test-2-gpu-amd: performance-test-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes, unit-test-backend-2-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
...@@ -235,13 +349,14 @@ jobs: ...@@ -235,13 +349,14 @@ jobs:
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
unit-test-backend-1-gpu-amd: accuracy-test-1-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
runner: [linux-mi300-gpu-1] runner: [linux-mi300-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
runs-on: ${{matrix.runner}} runs-on: ${{matrix.runner}}
steps: steps:
- name: Checkout code - name: Checkout code
...@@ -258,13 +373,17 @@ jobs: ...@@ -258,13 +373,17 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test - name: Evaluate Accuracy
timeout-minutes: 30 timeout-minutes: 30
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 12 bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
unit-test-backend-2-gpu-amd: accuracy-test-2-gpu-amd:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs: [check-changes, accuracy-test-1-gpu-amd]
if: always() && !failure() && !cancelled() &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
...@@ -285,68 +404,53 @@ jobs: ...@@ -285,68 +404,53 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test - name: Evaluate accuracy (TP=2)
timeout-minutes: 40 timeout-minutes: 30
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
unit-test-backend-8-gpu-amd: pr-test-amd-finish:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') needs:
strategy: [
fail-fast: false check-changes,
matrix:
runner: [linux-mi300-gpu-8] sgl-kernel-unit-test-amd,
part: [0, 1]
runs-on: ${{matrix.runner}} unit-test-frontend-amd,
unit-test-backend-1-gpu-amd,
unit-test-backend-2-gpu-amd,
unit-test-backend-8-gpu-amd,
performance-test-1-gpu-part-1-amd,
performance-test-1-gpu-part-2-amd,
performance-test-2-gpu-amd,
accuracy-test-1-gpu-amd,
accuracy-test-2-gpu-amd,
]
if: always()
runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Check all dependent job statuses
uses: actions/checkout@v4
- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm
- name: Start CI container
run: bash scripts/ci/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies
run: bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 60
run: | run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 3600 # Convert the 'needs' context to a JSON string
json_needs='${{ toJson(needs) }}'
unit-test-sgl-kernel-amd: # Get a list of all job names from the JSON keys
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
strategy:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Ensure VRAM is clear for job in $job_names; do
run: bash scripts/ensure_vram_clear.sh rocm # For each job, extract its result
result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
- name: Start CI container # Print the job name and its result
run: bash scripts/ci/amd_ci_start_container.sh echo "$job: $result"
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
- name: Install dependencies # Check for failure or cancellation and exit if found
run: | if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
bash scripts/ci/amd_ci_install_dependency.sh echo "The above jobs failed."
exit 1
fi
done
- name: Run test # If the loop completes, all jobs were successful
timeout-minutes: 14 echo "All jobs completed successfully"
run: | exit 0
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
import time
import unittest import unittest
from types import SimpleNamespace from types import SimpleNamespace
...@@ -39,6 +40,7 @@ class TestHiCache(CustomTestCase): ...@@ -39,6 +40,7 @@ class TestHiCache(CustomTestCase):
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
kill_process_tree(cls.process.pid) kill_process_tree(cls.process.pid)
time.sleep(5)
def test_mmlu(self): def test_mmlu(self):
args = SimpleNamespace( args = SimpleNamespace(
......
...@@ -8,7 +8,7 @@ import sys ...@@ -8,7 +8,7 @@ import sys
import unittest import unittest
from types import SimpleNamespace from types import SimpleNamespace
from sglang.srt.utils import kill_process_tree from sglang.srt.utils import is_hip, kill_process_tree
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
...@@ -17,15 +17,16 @@ from sglang.test.test_utils import ( ...@@ -17,15 +17,16 @@ from sglang.test.test_utils import (
popen_launch_server, popen_launch_server,
) )
_is_hip = is_hip()
# VLM models for testing # VLM models for testing
MODELS = [ if _is_hip:
SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45), MODELS = [SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4)]
SimpleNamespace( else:
model="Qwen/Qwen2.5-VL-3B-Instruct", MODELS = [
mmmu_accuracy=0.4, SimpleNamespace(model="google/gemma-3-27b-it", mmmu_accuracy=0.45),
), SimpleNamespace(model="Qwen/Qwen2.5-VL-3B-Instruct", mmmu_accuracy=0.4),
SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4), SimpleNamespace(model="openbmb/MiniCPM-V-2_6", mmmu_accuracy=0.4),
] ]
# Set default mem_fraction_static to 0.8 # Set default mem_fraction_static to 0.8
DEFAULT_MEM_FRACTION_STATIC = 0.8 DEFAULT_MEM_FRACTION_STATIC = 0.8
......
...@@ -125,6 +125,7 @@ suites = { ...@@ -125,6 +125,7 @@ suites = {
TestFile("test_torch_compile_moe.py", 210), TestFile("test_torch_compile_moe.py", 210),
TestFile("test_torch_native_attention_backend.py", 123), TestFile("test_torch_native_attention_backend.py", 123),
TestFile("test_torchao.py", 70), TestFile("test_torchao.py", 70),
TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_attention_backend.py", 150), TestFile("test_triton_attention_backend.py", 150),
TestFile("test_triton_attention_kernels.py", 4), TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_moe_channel_fp8_kernel.py", 25), TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
...@@ -372,9 +373,9 @@ suites = { ...@@ -372,9 +373,9 @@ suites = {
suite_amd = { suite_amd = {
"per-commit-amd": [ "per-commit-amd": [
TestFile("function_call/test_json_schema_constraint.py", 1), TestFile("function_call/test_json_schema_constraint.py", 1),
TestFile("hicache/test_hicache.py", 116), # TestFile("hicache/test_hicache.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
TestFile("hicache/test_hicache_mla.py", 127), # TestFile("hicache/test_hicache_mla.py", 127), # Disabled temporarily, # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
TestFile("hicache/test_hicache_storage.py", 127), # TestFile("hicache/test_hicache_storage.py", 127), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
TestFile("lora/test_lora.py", 150), TestFile("lora/test_lora.py", 150),
TestFile("lora/test_lora_backend.py", 99), TestFile("lora/test_lora_backend.py", 99),
TestFile("lora/test_lora_cuda_graph.py", 250), TestFile("lora/test_lora_cuda_graph.py", 250),
...@@ -385,8 +386,9 @@ suite_amd = { ...@@ -385,8 +386,9 @@ suite_amd = {
TestFile("models/test_qwen_models.py", 82), TestFile("models/test_qwen_models.py", 82),
TestFile("models/test_reward_models.py", 132), TestFile("models/test_reward_models.py", 132),
TestFile("models/test_transformers_models.py", 320), TestFile("models/test_transformers_models.py", 320),
TestFile("openai_server/basic/test_openai_embedding.py", 79), TestFile("models/test_vlm_models.py", 437),
TestFile("openai_server/basic/test_openai_server.py", 270), TestFile("openai_server/basic/test_openai_embedding.py", 141),
TestFile("openai_server/basic/test_openai_server.py", 149),
TestFile("openai_server/basic/test_protocol.py", 10), TestFile("openai_server/basic/test_protocol.py", 10),
TestFile("openai_server/basic/test_serving_chat.py", 10), TestFile("openai_server/basic/test_serving_chat.py", 10),
TestFile("openai_server/basic/test_serving_completions.py", 10), TestFile("openai_server/basic/test_serving_completions.py", 10),
...@@ -409,14 +411,15 @@ suite_amd = { ...@@ -409,14 +411,15 @@ suite_amd = {
TestFile("test_create_kvindices.py", 2), TestFile("test_create_kvindices.py", 2),
TestFile("test_eval_fp8_accuracy.py", 303), TestFile("test_eval_fp8_accuracy.py", 303),
TestFile("test_function_call_parser.py", 10), TestFile("test_function_call_parser.py", 10),
TestFile("test_fused_moe.py", 80), TestFile("test_fused_moe.py", 30),
TestFile("test_harmony_parser.py", 20),
TestFile("test_input_embeddings.py", 38), TestFile("test_input_embeddings.py", 38),
TestFile("test_io_struct.py", 8), TestFile("test_io_struct.py", 8),
TestFile("test_jinja_template_utils.py", 1), TestFile("test_jinja_template_utils.py", 1),
TestFile("test_metrics.py", 32), TestFile("test_metrics.py", 32),
TestFile("test_metrics_utils.py", 1), TestFile("test_metrics_utils.py", 1),
TestFile("test_mla.py", 242), TestFile("test_mla.py", 242),
TestFile("test_mla_deepseek_v3.py", 221), # TestFile("test_mla_deepseek_v3.py", 221), # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
TestFile("test_no_chunked_prefill.py", 108), TestFile("test_no_chunked_prefill.py", 108),
TestFile("test_page_size.py", 60), TestFile("test_page_size.py", 60),
TestFile("test_penalty.py", 180), TestFile("test_penalty.py", 180),
...@@ -433,9 +436,12 @@ suite_amd = { ...@@ -433,9 +436,12 @@ suite_amd = {
TestFile("test_torch_compile.py", 169), TestFile("test_torch_compile.py", 169),
TestFile("test_torch_compile_moe.py", 210), TestFile("test_torch_compile_moe.py", 210),
TestFile("test_torch_native_attention_backend.py", 123), TestFile("test_torch_native_attention_backend.py", 123),
TestFile("test_triton_attention_backend.py", 250), # TestFile("test_triton_attention_kernels.py", 4),
TestFile("test_triton_attention_backend.py", 150),
TestFile("test_triton_sliding_window.py", 250),
TestFile("test_wave_attention_kernels.py", 2), TestFile("test_wave_attention_kernels.py", 2),
# Disabled temporarily # Disabled temporarily
# TestFile("test_vlm_input_format.py", 300),
# TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127 # TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
# TestFile("openai_server/features/test_openai_server_hidden_states.py", 240), # TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
# TestFile("rl/test_update_weights_from_tensor.py", 48), # TestFile("rl/test_update_weights_from_tensor.py", 48),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment