run_suite.py 19.6 KB
Newer Older
1
2
import argparse
import glob
Lianmin Zheng's avatar
Lianmin Zheng committed
3
from dataclasses import dataclass
4
5
6

from sglang.test.test_utils import run_unittest_files

Lianmin Zheng's avatar
Lianmin Zheng committed
7
8
9
10
11
12
13

@dataclass
class TestFile:
    name: str
    estimated_time: float = 60


14
# NOTE: please sort the test cases alphabetically by the test file name
15
suites = {
Lianmin Zheng's avatar
Lianmin Zheng committed
16
    "per-commit-1-gpu": [
17
        TestFile("function_call/test_json_schema_constraint.py", 30),
18
        TestFile("hicache/test_hicache.py", 116),
19
        TestFile("hicache/test_hicache_eagle.py", 150),
20
21
22
        TestFile("hicache/test_hicache_mla.py", 127),
        TestFile("hicache/test_hicache_storage.py", 127),
        TestFile("lora/test_lora.py", 200),
23
24
        TestFile("lora/test_lora_eviction.py", 200),
        TestFile("lora/test_lora_eviction_policy.py", 200),
25
        TestFile("lora/test_lora_backend.py", 99),
26
        TestFile("lora/test_lora_eviction.py", 200),
27
        TestFile("lora/test_lora_qwen3.py", 97),
28
        TestFile("lora/test_lora_radix_cache.py", 100),
29
30
31
32
        TestFile("lora/test_lora_update.py", 400),
        TestFile("lora/test_multi_lora_backend.py", 60),
        TestFile("models/test_compressed_tensors_models.py", 42),
        TestFile("models/test_cross_encoder_models.py", 100),
33
        TestFile("models/test_embedding_models.py", 73),
34
        TestFile("models/test_encoder_embedding_models.py", 460),
Lianmin Zheng's avatar
Lianmin Zheng committed
35
        TestFile("models/test_generation_models.py", 103),
36
        TestFile("models/test_nvidia_nemotron_nano_v2.py", 180),
Lianmin Zheng's avatar
Lianmin Zheng committed
37
        TestFile("models/test_qwen_models.py", 82),
Stefan He's avatar
Stefan He committed
38
        TestFile("batch_invariant/test_batch_invariant_ops.py", 10),
Lianmin Zheng's avatar
Lianmin Zheng committed
39
        TestFile("models/test_reward_models.py", 132),
40
        TestFile("models/test_transformers_models.py", 320),
41
42
43
        TestFile("models/test_vlm_models.py", 741),
        TestFile("openai_server/basic/test_openai_embedding.py", 141),
        TestFile("openai_server/basic/test_openai_server.py", 149),
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        TestFile("openai_server/basic/test_protocol.py", 10),
        TestFile("openai_server/basic/test_serving_chat.py", 10),
        TestFile("openai_server/basic/test_serving_completions.py", 10),
        TestFile("openai_server/basic/test_serving_embedding.py", 10),
        TestFile("openai_server/features/test_enable_thinking.py", 70),
        TestFile("openai_server/features/test_json_constrained.py", 98),
        TestFile("openai_server/features/test_json_mode.py", 90),
        TestFile("openai_server/features/test_openai_server_ebnf.py", 95),
        TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
        TestFile("openai_server/features/test_reasoning_content.py", 89),
        TestFile("openai_server/function_call/test_openai_function_calling.py", 60),
        TestFile("openai_server/function_call/test_tool_choice.py", 226),
        TestFile("openai_server/validation/test_large_max_new_tokens.py", 41),
        TestFile("openai_server/validation/test_matched_stop.py", 60),
        TestFile("openai_server/validation/test_openai_server_ignore_eos.py", 85),
        TestFile("openai_server/validation/test_request_length_validation.py", 31),
60
61
62
        TestFile("quant/test_block_int8.py", 22),
        TestFile("quant/test_fp8_kernel.py", 8),
        TestFile("quant/test_int8_kernel.py", 8),
63
        TestFile("quant/test_triton_scaled_mm.py", 8),
64
        TestFile("quant/test_w8a8_quantization.py", 46),
65
        TestFile("rl/test_fp32_lm_head.py", 30),
66
67
        TestFile("rl/test_update_weights_from_disk.py", 114),
        TestFile("rl/test_update_weights_from_tensor.py", 48),
Lianmin Zheng's avatar
Lianmin Zheng committed
68
        TestFile("test_abort.py", 51),
69
        TestFile("test_build_eagle_tree.py", 8),
70
        TestFile("test_chunked_prefill.py", 313),
71
        TestFile("test_create_kvindices.py", 2),
72
        TestFile("test_deterministic.py", 320),
73
        TestFile("test_eagle_infer_a.py", 370),
74
        TestFile("test_eagle_infer_b.py", 700),
75
        TestFile("test_eagle_infer_beta.py", 300),
Lianmin Zheng's avatar
Lianmin Zheng committed
76
77
        TestFile("test_ebnf_constrained.py", 108),
        TestFile("test_eval_fp8_accuracy.py", 303),
78
        TestFile("test_fa3.py", 376),
79
        # TestFile("test_flashmla.py", 352),
80
        TestFile("test_function_call_parser.py", 10),
Lianmin Zheng's avatar
Lianmin Zheng committed
81
        TestFile("test_fused_moe.py", 30),
82
        TestFile("test_gpt_oss_1gpu.py", 600),
83
        TestFile("test_harmony_parser.py", 20),
84
        TestFile("test_hidden_states.py", 55),
85
        TestFile("test_hybrid_attn_backend.py", 379),
Lianmin Zheng's avatar
Lianmin Zheng committed
86
        TestFile("test_input_embeddings.py", 38),
87
        TestFile("test_io_struct.py", 8),
88
        TestFile("test_jinja_template_utils.py", 1),
89
        TestFile("test_mamba_unittest.py", 4),
90
        TestFile("test_metrics.py", 32),
91
        TestFile("test_metrics_utils.py", 1),
92
        TestFile("test_mla.py", 167),
Lianmin Zheng's avatar
Lianmin Zheng committed
93
        TestFile("test_mla_deepseek_v3.py", 500),
94
95
        TestFile("test_mla_flashinfer.py", 302),
        TestFile("test_mla_fp8.py", 93),
96
97
        TestFile("test_mla_int8_deepseek_v3.py", 429),
        TestFile("test_modelopt_loader.py", 30),
98
        TestFile("test_multi_tokenizer.py", 230),
99
        TestFile("test_ngram_speculative_decoding.py", 250),
Lianmin Zheng's avatar
Lianmin Zheng committed
100
        TestFile("test_no_chunked_prefill.py", 108),
101
        TestFile("test_no_overlap_scheduler.py", 234),
102
        TestFile("test_original_logprobs.py", 41),
Lianmin Zheng's avatar
Lianmin Zheng committed
103
        TestFile("test_page_size.py", 60),
104
        TestFile("test_penalty.py", 41),
105
        TestFile("test_priority_scheduling.py", 100),
Lianmin Zheng's avatar
Lianmin Zheng committed
106
        TestFile("test_pytorch_sampling_backend.py", 66),
107
        TestFile("test_radix_attention.py", 105),
108
        TestFile("test_radix_cache_unit.py", 5),
109
        TestFile("test_reasoning_parser.py", 5),
110
        TestFile("test_regex_constrained.py", 64),
111
        TestFile("test_request_queue_validation.py", 30),
112
        TestFile("test_retract_decode.py", 54),
113
        TestFile("test_score_api.py", 310),
Lianmin Zheng's avatar
Lianmin Zheng committed
114
        TestFile("test_server_args.py", 1),
115
        TestFile("test_skip_tokenizer_init.py", 117),
Lianmin Zheng's avatar
Lianmin Zheng committed
116
        TestFile("test_srt_endpoint.py", 130),
117
118
        TestFile("test_srt_engine.py", 261),
        TestFile("test_standalone_speculative_decoding.py", 250),
119
        TestFile("test_start_profile.py", 60),
120
121
        TestFile("test_profile_merger.py", 60),
        TestFile("test_profile_merger_http_api.py", 15),
122
        TestFile("test_swa_unittest.py", 1),
Lianmin Zheng's avatar
Lianmin Zheng committed
123
        TestFile("test_torch_compile.py", 76),
Lianmin Zheng's avatar
Lianmin Zheng committed
124
        TestFile("test_torch_compile_moe.py", 172),
125
        TestFile("test_torch_native_attention_backend.py", 123),
Lianmin Zheng's avatar
Lianmin Zheng committed
126
        TestFile("test_torchao.py", 70),
127
        TestFile("test_triton_attention_backend.py", 150),
128
        TestFile("test_triton_attention_kernels.py", 4),
Lianmin Zheng's avatar
Lianmin Zheng committed
129
        TestFile("test_triton_moe_channel_fp8_kernel.py", 25),
130
        TestFile("test_triton_sliding_window.py", 250),
131
        TestFile("test_utils_update_weights.py", 48),
Lianmin Zheng's avatar
Lianmin Zheng committed
132
        TestFile("test_vision_chunked_prefill.py", 175),
133
        TestFile("test_vision_openai_server_a.py", 608),
134
        TestFile("test_vlm_input_format.py", 300),
135
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
136
    "per-commit-2-gpu": [
137
        TestFile("ep/test_moe_ep.py", 140),
138
139
140
141
        TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
        TestFile("hicache/test_hicache_storage_file_backend.py", 200),
        TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
        TestFile("layers/attention/mamba/test_mamba2_mixer.py", 110),
142
        TestFile("lora/test_lora_tp.py", 116),
143
        TestFile("models/test_glm4_moe_models.py", 100),
144
        TestFile("rl/test_update_weights_from_distributed.py", 103),
Lianmin Zheng's avatar
Lianmin Zheng committed
145
        TestFile("test_data_parallelism.py", 73),
146
        TestFile("test_disaggregation_basic.py", 400),
Lianmin Zheng's avatar
Lianmin Zheng committed
147
        TestFile("test_dp_attention.py", 594),
148
        TestFile("test_load_weights_from_remote_instance.py", 72),
Lianmin Zheng's avatar
Lianmin Zheng committed
149
        TestFile("test_patch_torch.py", 19),
Shangming Cai's avatar
Shangming Cai committed
150
        TestFile("test_release_memory_occupation.py", 257),
Lianmin Zheng's avatar
Lianmin Zheng committed
151
152
    ],
    "per-commit-4-gpu": [
153
154
        TestFile("models/test_qwen3_next_models.py", 291),
        TestFile("test_disaggregation_dp_attention.py", 155),
Lianmin Zheng's avatar
Lianmin Zheng committed
155
156
        TestFile("test_gpt_oss_4gpu.py", 300),
        TestFile("test_local_attn.py", 411),
Lianmin Zheng's avatar
Lianmin Zheng committed
157
        TestFile("test_multi_instance_release_memory_occupation.py", 64),
158
        TestFile("test_pp_single_node.py", 481),
Lianmin Zheng's avatar
Lianmin Zheng committed
159
    ],
160
    "per-commit-8-gpu-h200": [
161
        TestFile("lora/test_lora_llama4.py", 400),
162
163
        TestFile("test_deepseek_v3_basic.py", 275),
        TestFile("test_deepseek_v3_mtp.py", 275),
164
        TestFile("test_disaggregation_hybrid_attention.py", 200),
Lianmin Zheng's avatar
Lianmin Zheng committed
165
    ],
166
167
168
169
170
    "per-commit-8-gpu-h20": [
        TestFile("quant/test_w4a8_deepseek_v3.py", 371),
        TestFile("test_disaggregation_different_tp.py", 600),
        TestFile("test_disaggregation_pp.py", 140),
    ],
Yineng Zhang's avatar
Yineng Zhang committed
171
    "per-commit-4-gpu-b200": [
Johnny's avatar
Johnny committed
172
        # TestFile("test_flash_attention_4.py"),
173
        # TestFile("test_gpt_oss_4gpu.py", 600),
174
        # TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
Lianmin Zheng's avatar
Lianmin Zheng committed
175
176
    ],
    "per-commit-4-gpu-deepep": [
177
        TestFile("ep/test_deepep_small.py", 531),
178
        TestFile("ep/test_mooncake_ep_small.py", 450),
Lianmin Zheng's avatar
Lianmin Zheng committed
179
    ],
180
    "per-commit-8-gpu-h200-deepep": [
181
        TestFile("ep/test_deepep_large.py", 338),
Lianmin Zheng's avatar
Lianmin Zheng committed
182
    ],
183
    "per-commit-8-gpu-h200-deepseek-v32": [
184
        TestFile("test_deepseek_v32_basic.py", 275),
185
        TestFile("test_deepseek_v32_mtp.py", 275),
186
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
187
    "vllm_dependency_test": [
188
        TestFile("quant/test_awq.py", 163),
Lianmin Zheng's avatar
Lianmin Zheng committed
189
190
191
        TestFile("test_bnb.py", 5),
        TestFile("test_gptqmodel_dynamic.py", 102),
        TestFile("test_vllm_dependency.py", 185),
192
        # TestFile("test_gguf.py", 96),
Lianmin Zheng's avatar
Lianmin Zheng committed
193
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
194
195
196
    # If the test cases take too long, considering adding them to nightly tests instead of per-commit tests
    "nightly-1-gpu": [],
    "nightly-8-gpu": [],
Lianmin Zheng's avatar
Lianmin Zheng committed
197
198
199
}

# Add AMD tests
200
# NOTE: please sort the test cases alphabetically by the test file name
Lianmin Zheng's avatar
Lianmin Zheng committed
201
suite_amd = {
202
    "per-commit-amd": [
203
        TestFile("function_call/test_json_schema_constraint.py", 30),
204
205
206
        TestFile("hicache/test_hicache.py", 116),
        TestFile("hicache/test_hicache_mla.py", 127),
        TestFile("hicache/test_hicache_storage.py", 127),
207
        TestFile("lora/test_lora.py", 200),
208
209
        TestFile("lora/test_lora_backend.py", 99),
        TestFile("lora/test_lora_cuda_graph.py", 250),
210
        TestFile("lora/test_lora_eviction.py", 200),
211
        TestFile("lora/test_lora_qwen3.py", 97),
212
        TestFile("lora/test_multi_lora_backend.py", 60),
213
        TestFile("models/test_compressed_tensors_models.py", 42),
214
215
        TestFile("models/test_qwen_models.py", 82),
        TestFile("models/test_reward_models.py", 132),
216
        TestFile("models/test_transformers_models.py", 320),
217
218
        TestFile("openai_server/basic/test_openai_embedding.py", 141),
        TestFile("openai_server/basic/test_openai_server.py", 149),
219
220
221
222
        TestFile("openai_server/basic/test_protocol.py", 10),
        TestFile("openai_server/basic/test_serving_chat.py", 10),
        TestFile("openai_server/basic/test_serving_completions.py", 10),
        TestFile("openai_server/basic/test_serving_embedding.py", 10),
223
        TestFile("openai_server/features/test_enable_thinking.py", 70),
224
225
226
        TestFile("openai_server/features/test_json_constrained.py", 98),
        TestFile("openai_server/features/test_json_mode.py", 90),
        TestFile("openai_server/features/test_openai_server_ebnf.py", 95),
227
        TestFile("openai_server/features/test_reasoning_content.py", 89),
228
229
        TestFile("openai_server/function_call/test_openai_function_calling.py", 60),
        TestFile("openai_server/function_call/test_tool_choice.py", 226),
230
        TestFile("openai_server/validation/test_large_max_new_tokens.py", 41),
231
232
        TestFile("openai_server/validation/test_matched_stop.py", 60),
        TestFile("openai_server/validation/test_openai_server_ignore_eos.py", 85),
233
        TestFile("openai_server/validation/test_request_length_validation.py", 31),
234
        TestFile("quant/test_awq_dequant.py", 2),
235
        TestFile("quant/test_block_int8.py", 22),
236
        TestFile("rl/test_update_weights_from_disk.py", 114),
237
238
        TestFile("test_abort.py", 51),
        TestFile("test_chunked_prefill.py", 313),
239
        TestFile("test_create_kvindices.py", 2),
240
        TestFile("test_ebnf_constrained.py", 108),
241
242
        TestFile("test_eval_fp8_accuracy.py", 303),
        TestFile("test_function_call_parser.py", 10),
243
        TestFile("test_fused_moe.py", 30),
244
        TestFile("test_input_embeddings.py", 38),
245
246
247
        TestFile("test_io_struct.py", 8),
        TestFile("test_jinja_template_utils.py", 1),
        TestFile("test_metrics.py", 32),
248
        TestFile("test_metrics_utils.py", 1),
249
250
        TestFile("test_mla.py", 242),
        TestFile("test_mla_deepseek_v3.py", 221),
251
252
        TestFile("test_no_chunked_prefill.py", 108),
        TestFile("test_page_size.py", 60),
253
        TestFile("test_penalty.py", 41),
254
255
        TestFile("test_pytorch_sampling_backend.py", 66),
        TestFile("test_radix_attention.py", 105),
256
        TestFile("test_reasoning_parser.py", 5),
257
        TestFile("test_regex_constrained.py", 64),
258
        TestFile("test_retract_decode.py", 54),
259
        TestFile("test_rope_rocm.py", 3),
260
261
        TestFile("test_server_args.py", 1),
        TestFile("test_skip_tokenizer_init.py", 117),
262
        TestFile("test_srt_endpoint.py", 130),
263
        TestFile("test_srt_engine.py", 261),
264
        TestFile("test_torch_compile.py", 169),
265
        TestFile("test_torch_compile_moe.py", 172),
266
267
        TestFile("test_torch_native_attention_backend.py", 123),
        TestFile("test_triton_attention_backend.py", 150),
268
        TestFile("test_wave_attention_kernels.py", 2),
269
270
271
272
273
274
        # Disabled temporarily
        # TestFile("models/test_embedding_models.py", 73), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
        # TestFile("openai_server/features/test_openai_server_hidden_states.py", 240),
        # TestFile("rl/test_update_weights_from_tensor.py", 48),
        # TestFile("test_no_overlap_scheduler.py", 234), # Disabled temporarily and track in #7703
        # TestFile("test_vision_chunked_prefill.py", 175), # Disabled temporarily and track in #7701
Sai Enduri's avatar
Sai Enduri committed
275
        # TestFile("test_wave_attention_backend.py", 150), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
276
    ],
277
278
    "per-commit-amd-mi35x": [
        TestFile("test_gpt_oss_1gpu.py", 600),
279
        TestFile("test_mla.py", 242),
280
    ],
281
    "per-commit-2-gpu-amd": [
282
283
        TestFile("lora/test_lora_tp.py", 116),
        TestFile("rl/test_update_weights_from_distributed.py", 103),
284
        TestFile("test_data_parallelism.py", 73),
285
        TestFile("test_load_weights_from_remote_instance.py", 72),
Sai Enduri's avatar
Sai Enduri committed
286
        # TestFile("test_patch_torch.py", 19), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/11127
287
    ],
288
289
290
    "per-commit-4-gpu-amd": [
        TestFile("test_pp_single_node.py", 150),
    ],
291
    "per-commit-8-gpu-amd": [
292
293
        TestFile("test_deepseek_v3_basic.py", 275),
        TestFile("test_deepseek_v3_mtp.py", 275),
294
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
295
296
    "nightly-amd": [
        TestFile("test_nightly_gsm8k_eval_amd.py"),
fzyzcjy's avatar
fzyzcjy committed
297
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
298
299
300
}

# Add Intel Xeon tests
301
# NOTE: please sort the test cases alphabetically by the test file name
Lianmin Zheng's avatar
Lianmin Zheng committed
302
suite_xeon = {
303
304
    "per-commit-cpu": [
        TestFile("cpu/test_activation.py"),
305
        TestFile("cpu/test_binding.py"),
306
307
308
        TestFile("cpu/test_decode.py"),
        TestFile("cpu/test_extend.py"),
        TestFile("cpu/test_gemm.py"),
309
        TestFile("cpu/test_mla.py"),
310
311
312
        TestFile("cpu/test_moe.py"),
        TestFile("cpu/test_norm.py"),
        TestFile("cpu/test_qkv_proj_with_rope.py"),
313
        TestFile("cpu/test_rope.py"),
314
        TestFile("cpu/test_shared_expert.py"),
315
        TestFile("cpu/test_topk.py"),
316
317
318
319
        TestFile("cpu/test_cpu_graph.py"),
        TestFile("cpu/test_intel_amx_attention_backend_a.py"),
        TestFile("cpu/test_intel_amx_attention_backend_b.py"),
        TestFile("cpu/test_intel_amx_attention_backend_c.py"),
320
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
321
322
}

323
324
325
326
327
328
329
# Add Intel XPU tests
suite_xpu = {
    "per-commit-xpu": [
        TestFile("xpu/test_intel_xpu_backend.py"),
    ],
}

Lianmin Zheng's avatar
Lianmin Zheng committed
330
# Add Ascend NPU tests
331
# NOTE: please sort the test cases alphabetically by the test file name
Lianmin Zheng's avatar
Lianmin Zheng committed
332
333
suite_ascend = {
    "per-commit-1-ascend-npu": [
334
        TestFile("ascend/test_ascend_graph_tp1_bf16.py", 400),
335
        TestFile("ascend/test_ascend_tp1_bf16.py", 400),
336
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
337
    "per-commit-2-ascend-npu": [
338
        TestFile("ascend/test_ascend_graph_tp2_bf16.py", 400),
339
        TestFile("ascend/test_ascend_mla_fia_w8a8int8.py", 400),
340
341
        TestFile("ascend/test_ascend_tp2_bf16.py", 400),
        TestFile("ascend/test_ascend_tp2_fia_bf16.py", 400),
342
    ],
Lianmin Zheng's avatar
Lianmin Zheng committed
343
    "per-commit-4-ascend-npu": [
344
        TestFile("ascend/test_ascend_mla_w8a8int8.py", 400),
345
        TestFile("ascend/test_ascend_tp4_bf16.py", 400),
346
    ],
347
348
349
    "per-commit-16-ascend-a3": [
        TestFile("ascend/test_ascend_deepep.py", 400),
    ],
350
351
}

Lianmin Zheng's avatar
Lianmin Zheng committed
352
353
354
suites.update(suite_amd)
suites.update(suite_xeon)
suites.update(suite_ascend)
355
suites.update(suite_xpu)
Lianmin Zheng's avatar
Lianmin Zheng committed
356

Lianmin Zheng's avatar
Lianmin Zheng committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400

def auto_partition(files, rank, size):
    """
    Partition files into size sublists with approximately equal sums of estimated times
    using stable sorting, and return the partition for the specified rank.

    Args:
        files (list): List of file objects with estimated_time attribute
        rank (int): Index of the partition to return (0 to size-1)
        size (int): Number of partitions

    Returns:
        list: List of file objects in the specified rank's partition
    """
    weights = [f.estimated_time for f in files]

    if not weights or size <= 0 or size > len(weights):
        return []

    # Create list of (weight, original_index) tuples
    # Using negative index as secondary key to maintain original order for equal weights
    indexed_weights = [(w, -i) for i, w in enumerate(weights)]
    # Stable sort in descending order by weight
    # If weights are equal, larger (negative) index comes first (i.e., earlier original position)
    indexed_weights = sorted(indexed_weights, reverse=True)

    # Extract original indices (negate back to positive)
    indexed_weights = [(w, -i) for w, i in indexed_weights]

    # Initialize partitions and their sums
    partitions = [[] for _ in range(size)]
    sums = [0.0] * size

    # Greedy approach: assign each weight to partition with smallest current sum
    for weight, idx in indexed_weights:
        # Find partition with minimum sum
        min_sum_idx = sums.index(min(sums))
        partitions[min_sum_idx].append(idx)
        sums[min_sum_idx] += weight

    # Return the files corresponding to the indices in the specified rank's partition
    indices = partitions[rank]
    return [files[i] for i in indices]

401
402
403
404
405
406

if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        "--timeout-per-file",
        type=int,
407
        default=1200,
408
409
410
411
412
413
414
415
416
        help="The time limit for running one file in seconds.",
    )
    arg_parser.add_argument(
        "--suite",
        type=str,
        default=list(suites.keys())[0],
        choices=list(suites.keys()) + ["all"],
        help="The suite to run",
    )
417
418
419
420
421
422
423
424
425
426
427
428
    arg_parser.add_argument(
        "--range-begin",
        type=int,
        default=0,
        help="The begin index of the range of the files to run.",
    )
    arg_parser.add_argument(
        "--range-end",
        type=int,
        default=None,
        help="The end index of the range of the files to run.",
    )
Lianmin Zheng's avatar
Lianmin Zheng committed
429
430
431
432
433
434
435
436
437
438
    arg_parser.add_argument(
        "--auto-partition-id",
        type=int,
        help="Use auto load balancing. The part id.",
    )
    arg_parser.add_argument(
        "--auto-partition-size",
        type=int,
        help="Use auto load balancing. The number of parts.",
    )
439
    args = arg_parser.parse_args()
Lianmin Zheng's avatar
Lianmin Zheng committed
440
    print(f"{args=}")
441
442
443
444
445
446

    if args.suite == "all":
        files = glob.glob("**/test_*.py", recursive=True)
    else:
        files = suites[args.suite]

Lianmin Zheng's avatar
Lianmin Zheng committed
447
448
449
450
    if args.auto_partition_size:
        files = auto_partition(files, args.auto_partition_id, args.auto_partition_size)
    else:
        files = files[args.range_begin : args.range_end]
451

Lianmin Zheng's avatar
Lianmin Zheng committed
452
    print("The running tests are ", [f.name for f in files])
453

454
455
    exit_code = run_unittest_files(files, args.timeout_per_file)
    exit(exit_code)