Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
35ca04d2
Unverified
Commit
35ca04d2
authored
Apr 27, 2025
by
Lianmin Zheng
Committed by
GitHub
Apr 27, 2025
Browse files
[CI] fix port conflicts (#5789)
parent
3c4e0ee6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
55 additions
and
51 deletions
+55
-51
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+3
-3
python/sglang/bench_serving.py
python/sglang/bench_serving.py
+4
-7
python/sglang/srt/entrypoints/http_server.py
python/sglang/srt/entrypoints/http_server.py
+3
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+0
-1
test/srt/run_suite.py
test/srt/run_suite.py
+25
-25
test/srt/test_torch_compile_moe.py
test/srt/test_torch_compile_moe.py
+1
-1
test/srt/test_update_weights_from_distributed.py
test/srt/test_update_weights_from_distributed.py
+19
-13
No files found.
.github/workflows/pr-test.yml
View file @
35ca04d2
...
@@ -54,7 +54,7 @@ jobs:
...
@@ -54,7 +54,7 @@ jobs:
strategy
:
strategy
:
fail-fast
:
false
fail-fast
:
false
matrix
:
matrix
:
part
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
]
part
:
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
]
steps
:
steps
:
-
name
:
Checkout code
-
name
:
Checkout code
uses
:
actions/checkout@v4
uses
:
actions/checkout@v4
...
@@ -64,10 +64,10 @@ jobs:
...
@@ -64,10 +64,10 @@ jobs:
bash scripts/ci_install_dependency.sh
bash scripts/ci_install_dependency.sh
-
name
:
Run test
-
name
:
Run test
timeout-minutes
:
4
0
timeout-minutes
:
3
0
run
:
|
run
:
|
cd test/srt
cd test/srt
python3 run_suite.py --suite per-commit --auto-partition-id ${{ matrix.part }} --auto-partition-size
7
python3 run_suite.py --suite per-commit --auto-partition-id ${{ matrix.part }} --auto-partition-size
8
unit-test-backend-2-gpu
:
unit-test-backend-2-gpu
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
...
...
python/sglang/bench_serving.py
View file @
35ca04d2
...
@@ -977,6 +977,7 @@ async def benchmark(
...
@@ -977,6 +977,7 @@ async def benchmark(
profile
:
bool
,
profile
:
bool
,
pd_seperated
:
bool
=
False
,
pd_seperated
:
bool
=
False
,
flush_cache
:
bool
=
False
,
flush_cache
:
bool
=
False
,
warmup_requests
:
int
=
1
,
):
):
if
backend
in
ASYNC_REQUEST_FUNCS
:
if
backend
in
ASYNC_REQUEST_FUNCS
:
request_func
=
ASYNC_REQUEST_FUNCS
[
backend
]
request_func
=
ASYNC_REQUEST_FUNCS
[
backend
]
...
@@ -993,10 +994,8 @@ async def benchmark(
...
@@ -993,10 +994,8 @@ async def benchmark(
async
with
semaphore
:
async
with
semaphore
:
return
await
request_func
(
request_func_input
=
request_func_input
,
pbar
=
pbar
)
return
await
request_func
(
request_func_input
=
request_func_input
,
pbar
=
pbar
)
if
not
hasattr
(
args
,
"warmup_requests"
):
args
.
warmup_requests
=
1
# Warmup
# Warmup
print
(
f
"Starting warmup with
{
args
.
warmup_requests
}
sequences..."
)
print
(
f
"Starting warmup with
{
warmup_requests
}
sequences..."
)
# Use the first request for all warmup iterations
# Use the first request for all warmup iterations
test_prompt
,
test_prompt_len
,
test_output_len
=
input_requests
[
0
]
test_prompt
,
test_prompt_len
,
test_output_len
=
input_requests
[
0
]
...
@@ -1018,7 +1017,7 @@ async def benchmark(
...
@@ -1018,7 +1017,7 @@ async def benchmark(
# Run warmup requests
# Run warmup requests
warmup_tasks
=
[]
warmup_tasks
=
[]
for
_
in
range
(
args
.
warmup_requests
):
for
_
in
range
(
warmup_requests
):
warmup_tasks
.
append
(
warmup_tasks
.
append
(
asyncio
.
create_task
(
request_func
(
request_func_input
=
test_input
))
asyncio
.
create_task
(
request_func
(
request_func_input
=
test_input
))
)
)
...
@@ -1026,9 +1025,7 @@ async def benchmark(
...
@@ -1026,9 +1025,7 @@ async def benchmark(
warmup_outputs
=
await
asyncio
.
gather
(
*
warmup_tasks
)
warmup_outputs
=
await
asyncio
.
gather
(
*
warmup_tasks
)
# Check if at least one warmup request succeeded
# Check if at least one warmup request succeeded
if
args
.
warmup_requests
>
0
and
not
any
(
if
warmup_requests
>
0
and
not
any
(
output
.
success
for
output
in
warmup_outputs
):
output
.
success
for
output
in
warmup_outputs
):
raise
ValueError
(
raise
ValueError
(
"Warmup failed - Please make sure benchmark arguments "
"Warmup failed - Please make sure benchmark arguments "
f
"are correctly specified. Error:
{
warmup_outputs
[
0
].
error
}
"
f
"are correctly specified. Error:
{
warmup_outputs
[
0
].
error
}
"
...
...
python/sglang/srt/entrypoints/http_server.py
View file @
35ca04d2
...
@@ -281,7 +281,9 @@ async def generate_from_file_request(file: UploadFile, request: Request):
...
@@ -281,7 +281,9 @@ async def generate_from_file_request(file: UploadFile, request: Request):
)
)
try
:
try
:
ret
=
await
_global_state
.
generate_request
(
obj
,
request
).
__anext__
()
ret
=
await
_global_state
.
tokenizer_manager
.
generate_request
(
obj
,
request
).
__anext__
()
return
ret
return
ret
except
ValueError
as
e
:
except
ValueError
as
e
:
logger
.
error
(
f
"Error:
{
e
}
"
)
logger
.
error
(
f
"Error:
{
e
}
"
)
...
...
python/sglang/test/test_utils.py
View file @
35ca04d2
...
@@ -8,7 +8,6 @@ import random
...
@@ -8,7 +8,6 @@ import random
import
subprocess
import
subprocess
import
threading
import
threading
import
time
import
time
import
traceback
import
unittest
import
unittest
from
concurrent.futures
import
ThreadPoolExecutor
from
concurrent.futures
import
ThreadPoolExecutor
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
...
...
test/srt/run_suite.py
View file @
35ca04d2
...
@@ -14,7 +14,7 @@ class TestFile:
...
@@ -14,7 +14,7 @@ class TestFile:
suites
=
{
suites
=
{
"per-commit"
:
[
"per-commit"
:
[
TestFile
(
"models/lora/test_lora.py"
,
76
),
TestFile
(
"models/lora/test_lora.py"
,
76
),
TestFile
(
"models/lora/test_lora_backend.py"
,
420
),
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/test_embedding_models.py"
,
35
),
TestFile
(
"models/test_embedding_models.py"
,
35
),
TestFile
(
"models/test_generation_models.py"
,
103
),
TestFile
(
"models/test_generation_models.py"
,
103
),
...
@@ -23,30 +23,30 @@ suites = {
...
@@ -23,30 +23,30 @@ suites = {
TestFile
(
"models/test_compressed_tensors_models.py"
,
100
),
TestFile
(
"models/test_compressed_tensors_models.py"
,
100
),
TestFile
(
"models/test_reward_models.py"
,
83
),
TestFile
(
"models/test_reward_models.py"
,
83
),
TestFile
(
"models/test_gme_qwen_models.py"
,
45
),
TestFile
(
"models/test_gme_qwen_models.py"
,
45
),
TestFile
(
"models/test_clip_models.py"
,
100
),
TestFile
(
"models/test_clip_models.py"
,
52
),
TestFile
(
"models/test_vlm_models.py"
,
100
),
TestFile
(
"models/test_vlm_models.py"
,
581
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_abort.py"
,
51
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_chunked_prefill.py"
,
336
),
TestFile
(
"test_chunked_prefill.py"
,
285
),
TestFile
(
"test_eagle_infer.py"
,
5
00
),
TestFile
(
"test_eagle_infer.py"
,
5
84
),
TestFile
(
"test_ebnf_constrained.py"
),
TestFile
(
"test_ebnf_constrained.py"
),
TestFile
(
"test_fa3.py"
,
400
),
TestFile
(
"test_fa3.py"
,
376
),
TestFile
(
"test_fp8_kernel.py"
,
8
),
TestFile
(
"test_fp8_kernel.py"
,
8
),
TestFile
(
"test_embedding_openai_server.py"
,
36
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_hidden_states.py"
,
55
),
TestFile
(
"test_int8_kernel.py"
,
8
),
TestFile
(
"test_int8_kernel.py"
,
8
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_input_embeddings.py"
,
38
),
TestFile
(
"test_json_constrained.py"
,
98
),
TestFile
(
"test_json_constrained.py"
,
98
),
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
TestFile
(
"test_large_max_new_tokens.py"
,
41
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_metrics.py"
,
32
),
TestFile
(
"test_mla.py"
,
16
2
),
TestFile
(
"test_mla.py"
,
24
2
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_mla_deepseek_v3.py"
,
221
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
522
),
TestFile
(
"test_mla_int8_deepseek_v3.py"
,
674
),
TestFile
(
"test_mla_flashinfer.py"
,
395
),
TestFile
(
"test_mla_flashinfer.py"
,
395
),
TestFile
(
"test_mla_fp8.py"
,
9
3
),
TestFile
(
"test_mla_fp8.py"
,
15
3
),
TestFile
(
"test_no_chunked_prefill.py"
,
126
),
TestFile
(
"test_no_chunked_prefill.py"
,
126
),
TestFile
(
"test_no_overlap_scheduler.py"
,
262
),
TestFile
(
"test_no_overlap_scheduler.py"
,
262
),
TestFile
(
"test_openai_server.py"
,
1
86
),
TestFile
(
"test_openai_server.py"
,
1
49
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_penalty.py"
,
41
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_page_size.py"
,
60
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
TestFile
(
"test_pytorch_sampling_backend.py"
,
66
),
...
@@ -57,11 +57,11 @@ suites = {
...
@@ -57,11 +57,11 @@ suites = {
TestFile
(
"test_request_length_validation.py"
,
31
),
TestFile
(
"test_request_length_validation.py"
,
31
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_retract_decode.py"
,
54
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_server_args.py"
,
1
),
TestFile
(
"test_skip_tokenizer_init.py"
,
7
2
),
TestFile
(
"test_skip_tokenizer_init.py"
,
11
7
),
TestFile
(
"test_srt_engine.py"
,
237
),
TestFile
(
"test_srt_engine.py"
,
237
),
TestFile
(
"test_srt_endpoint.py"
,
94
),
TestFile
(
"test_srt_endpoint.py"
,
94
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile.py"
,
76
),
TestFile
(
"test_torch_compile_moe.py"
,
8
5
),
TestFile
(
"test_torch_compile_moe.py"
,
23
5
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torch_native_attention_backend.py"
,
123
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_torchao.py"
,
70
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
TestFile
(
"test_triton_attention_kernels.py"
,
4
),
...
@@ -69,27 +69,27 @@ suites = {
...
@@ -69,27 +69,27 @@ suites = {
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_update_weights_from_disk.py"
,
114
),
TestFile
(
"test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_update_weights_from_tensor.py"
,
48
),
TestFile
(
"test_vertex_endpoint.py"
,
31
),
TestFile
(
"test_vertex_endpoint.py"
,
31
),
TestFile
(
"test_vision_chunked_prefill.py"
,
9
9
),
TestFile
(
"test_vision_chunked_prefill.py"
,
11
9
),
TestFile
(
"test_vlm_accuracy.py"
,
60
),
TestFile
(
"test_vlm_accuracy.py"
,
60
),
TestFile
(
"test_vision_openai_server.py"
,
5
37
),
TestFile
(
"test_vision_openai_server.py"
,
6
37
),
TestFile
(
"test_fim_completion.py"
,
40
),
TestFile
(
"test_fim_completion.py"
,
40
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_hicache.py"
,
6
0
),
TestFile
(
"test_hicache.py"
,
11
6
),
TestFile
(
"test_hicache_mla.py"
,
90
),
TestFile
(
"test_hicache_mla.py"
,
254
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_fused_moe.py"
,
30
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
TestFile
(
"test_triton_moe_channel_fp8_kernel.py"
,
25
),
],
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
TestFile
(
"models/lora/test_lora_tp.py"
,
1
50
),
TestFile
(
"models/lora/test_lora_tp.py"
,
1
16
),
TestFile
(
"test_data_parallelism.py"
,
90
),
TestFile
(
"test_data_parallelism.py"
,
73
),
TestFile
(
"test_dp_attention.py"
,
1
50
),
TestFile
(
"test_dp_attention.py"
,
1
37
),
TestFile
(
"test_mla_tp.py"
,
17
4
),
TestFile
(
"test_mla_tp.py"
,
17
0
),
TestFile
(
"test_moe_ep.py"
,
220
),
TestFile
(
"test_moe_ep.py"
,
181
),
TestFile
(
"test_patch_torch.py"
,
30
),
TestFile
(
"test_patch_torch.py"
,
19
),
TestFile
(
"test_update_weights_from_distributed.py"
,
10
0
),
TestFile
(
"test_update_weights_from_distributed.py"
,
10
3
),
TestFile
(
"test_verl_engine.py"
,
100
),
TestFile
(
"test_verl_engine.py"
,
64
),
],
],
"per-commit-8-gpu"
:
[
"per-commit-8-gpu"
:
[
TestFile
(
"test_local_attn.py"
,
250
),
TestFile
(
"test_local_attn.py"
,
250
),
...
...
test/srt/test_torch_compile_moe.py
View file @
35ca04d2
...
@@ -24,7 +24,7 @@ class TestTorchCompileMoe(CustomTestCase):
...
@@ -24,7 +24,7 @@ class TestTorchCompileMoe(CustomTestCase):
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"
8
"
],
other_args
=
[
"--enable-torch-compile"
,
"--torch-compile-max-bs"
,
"
4
"
],
)
)
@
classmethod
@
classmethod
...
...
test/srt/test_update_weights_from_distributed.py
View file @
35ca04d2
...
@@ -129,7 +129,7 @@ def init_process_hf(
...
@@ -129,7 +129,7 @@ def init_process_hf(
hf_instruct_params
=
[]
hf_instruct_params
=
[]
hf_base_params
=
[]
hf_base_params
=
[]
print
(
"get parameter in hf instruct model and base model"
)
print
(
"
[hf]
get parameter in hf instruct model and base model"
)
for
parameter_name
in
checking_parameters
:
for
parameter_name
in
checking_parameters
:
hf_instruct_params
.
append
(
hf_instruct_params
.
append
(
hf_instruct_model
.
get_parameter
(
parameter_name
)[:
truncate_size
]
hf_instruct_model
.
get_parameter
(
parameter_name
)[:
truncate_size
]
...
@@ -152,10 +152,12 @@ def init_process_hf(
...
@@ -152,10 +152,12 @@ def init_process_hf(
param_queue
.
put
((
"hf_base_params"
,
hf_base_params
))
param_queue
.
put
((
"hf_base_params"
,
hf_base_params
))
# Init weight update group for rank 0 (the training engine in RLHF).
# Init weight update group for rank 0 (the training engine in RLHF).
print
(
f
"rank
{
rank
}
world_size:
{
world_size
}
init custom process group"
)
port
=
60000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
init_method
=
f
"tcp://localhost:
{
port
}
"
print
(
f
"[hf]
{
rank
=
}
{
world_size
=
}
init custom process group.
{
init_method
=
}
"
)
group
=
init_custom_process_group
(
group
=
init_custom_process_group
(
backend
=
"nccl"
,
backend
=
"nccl"
,
init_method
=
"tcp://localhost:65500"
,
init_method
=
init_method
,
world_size
=
world_size
,
world_size
=
world_size
,
rank
=
rank
,
rank
=
rank
,
group_name
=
"test_parameter_update_group"
,
group_name
=
"test_parameter_update_group"
,
...
@@ -184,7 +186,7 @@ def init_process_hf(
...
@@ -184,7 +186,7 @@ def init_process_hf(
# Measure the latency of broadcasting/weights update.
# Measure the latency of broadcasting/weights update.
broadcast_time
=
time_end_broadcast
-
time_begin_broadcast
broadcast_time
=
time_end_broadcast
-
time_begin_broadcast
print
(
f
"
rank
{
rank
}
broadcast parameter time:
{
broadcast_time
:.
3
f
}
s"
)
print
(
f
"
[hf]
{
rank
=
}
{
broadcast_time
=
:.
3
f
}
s"
)
param_queue
.
put
((
"broadcast_time"
,
broadcast_time
))
param_queue
.
put
((
"broadcast_time"
,
broadcast_time
))
# Delete the huggingface models to free up memory.
# Delete the huggingface models to free up memory.
...
@@ -210,17 +212,21 @@ def init_process_sgl(
...
@@ -210,17 +212,21 @@ def init_process_sgl(
torch
.
cuda
.
synchronize
()
torch
.
cuda
.
synchronize
()
base_gpu_id
=
1
if
rank
==
1
else
1
+
tp_size
base_gpu_id
=
1
if
rank
==
1
else
1
+
tp_size
if
backend
==
"Engine"
:
if
backend
==
"Engine"
:
print
(
f
"[sgl] rank
{
rank
}
init engine"
)
engine
=
sgl
.
Engine
(
engine
=
sgl
.
Engine
(
model_path
=
model_name
,
model_path
=
model_name
,
random_seed
=
42
,
base_gpu_id
=
base_gpu_id
,
base_gpu_id
=
base_gpu_id
,
tp_size
=
tp_size
,
tp_size
=
tp_size
,
cuda_graph_max_bs
=
2
,
)
)
else
:
else
:
if
rank
==
1
:
if
rank
==
1
:
url
=
DEFAULT_URL_FOR_TEST
url
=
DEFAULT_URL_FOR_TEST
else
:
else
:
url
=
DEFAULT_URL_FOR_TEST
.
replace
(
"2157"
,
"2159"
)
host
,
port
=
DEFAULT_URL_FOR_TEST
.
split
(
":"
)
url
=
":"
.
join
(
host
,
str
(
int
(
port
)
+
10000
))
print
(
f
"[sgl] rank
{
rank
}
init server on url:
{
url
}
"
)
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model_name
,
model_name
,
url
,
url
,
...
@@ -230,13 +236,11 @@ def init_process_sgl(
...
@@ -230,13 +236,11 @@ def init_process_sgl(
str
(
base_gpu_id
),
str
(
base_gpu_id
),
"--tp-size"
,
"--tp-size"
,
str
(
tp_size
),
str
(
tp_size
),
"--cuda-graph-max-bs"
,
2
,
),
),
)
)
torch
.
cuda
.
synchronize
()
torch
.
cuda
.
synchronize
()
if
backend
==
"Engine"
:
print
(
f
"rank
{
rank
}
init engine"
)
else
:
print
(
f
"rank
{
rank
}
init server on url:
{
url
}
"
)
# Get weights of instruct model, i.e. pre-training weights.
# Get weights of instruct model, i.e. pre-training weights.
instruct_params
=
[]
instruct_params
=
[]
...
@@ -252,11 +256,13 @@ def init_process_sgl(
...
@@ -252,11 +256,13 @@ def init_process_sgl(
param_queue
.
put
((
f
"sgl_dp_
{
rank
}
_instruct_params"
,
instruct_params
))
param_queue
.
put
((
f
"sgl_dp_
{
rank
}
_instruct_params"
,
instruct_params
))
port
=
60000
+
int
(
os
.
environ
.
get
(
"CUDA_VISIBLE_DEVICES"
,
"0"
)[
0
])
*
100
# Init weight update group with the training engine.
# Init weight update group with the training engine.
if
backend
==
"Engine"
:
if
backend
==
"Engine"
:
engine
.
init_weights_update_group
(
engine
.
init_weights_update_group
(
master_address
=
"localhost"
,
master_address
=
"localhost"
,
master_port
=
"65500"
,
master_port
=
str
(
port
)
,
rank_offset
=
base_gpu_id
,
rank_offset
=
base_gpu_id
,
world_size
=
world_size
,
world_size
=
world_size
,
group_name
=
"test_parameter_update_group"
,
group_name
=
"test_parameter_update_group"
,
...
@@ -267,7 +273,7 @@ def init_process_sgl(
...
@@ -267,7 +273,7 @@ def init_process_sgl(
f
"
{
url
}
/init_weights_update_group"
,
f
"
{
url
}
/init_weights_update_group"
,
json
=
{
json
=
{
"master_address"
:
"localhost"
,
"master_address"
:
"localhost"
,
"master_port"
:
"65500"
,
"master_port"
:
str
(
port
)
,
"rank_offset"
:
base_gpu_id
,
"rank_offset"
:
base_gpu_id
,
"world_size"
:
world_size
,
"world_size"
:
world_size
,
"group_name"
:
"test_parameter_update_group"
,
"group_name"
:
"test_parameter_update_group"
,
...
@@ -311,7 +317,7 @@ def init_process_sgl(
...
@@ -311,7 +317,7 @@ def init_process_sgl(
# Measure the latency of broadcast/weights update.
# Measure the latency of broadcast/weights update.
update_time
=
time_end_update
-
time_begin_update
update_time
=
time_end_update
-
time_begin_update
print
(
print
(
f
"fully update model_name
{
model_name
}
rank
{
rank
}
parameter from distributed time:
{
update_time
:.
3
f
}
s"
f
"
[sgl]
fully update model_name
{
model_name
}
rank
{
rank
}
parameter from distributed time:
{
update_time
:.
3
f
}
s"
)
)
param_queue
.
put
((
f
"update_sgl_dp_
{
rank
}
_time"
,
update_time
))
param_queue
.
put
((
f
"update_sgl_dp_
{
rank
}
_time"
,
update_time
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment