Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a9e90b4b
Unverified
Commit
a9e90b4b
authored
Nov 17, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 17, 2024
Browse files
[Minor] Fix styles for overlap mode (#2068)
parent
8c280cee
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
17 deletions
+8
-17
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+2
-2
python/sglang/srt/managers/tp_worker_overlap_thread.py
python/sglang/srt/managers/tp_worker_overlap_thread.py
+1
-10
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+0
-4
test/srt/test_triton_attention_backend.py
test/srt/test_triton_attention_backend.py
+5
-1
No files found.
python/sglang/srt/managers/scheduler.py
View file @
a9e90b4b
...
@@ -1002,7 +1002,7 @@ class Scheduler:
...
@@ -1002,7 +1002,7 @@ class Scheduler:
if
req
.
is_retracted
:
if
req
.
is_retracted
:
continue
continue
if
self
.
server_args
.
enable_overlap
_schedule
and
(
req
.
finished
()
)
:
if
self
.
enable_overlap
and
req
.
finished
():
self
.
token_to_kv_pool
.
free
(
batch
.
out_cache_loc
[
i
:
i
+
1
])
self
.
token_to_kv_pool
.
free
(
batch
.
out_cache_loc
[
i
:
i
+
1
])
continue
continue
...
@@ -1319,7 +1319,7 @@ def run_scheduler_process(
...
@@ -1319,7 +1319,7 @@ def run_scheduler_process(
try
:
try
:
scheduler
=
Scheduler
(
server_args
,
port_args
,
gpu_id
,
tp_rank
,
dp_rank
)
scheduler
=
Scheduler
(
server_args
,
port_args
,
gpu_id
,
tp_rank
,
dp_rank
)
pipe_writer
.
send
(
"ready"
)
pipe_writer
.
send
(
"ready"
)
if
s
erver_args
.
enable_overlap
_schedule
:
if
s
cheduler
.
enable_overlap
:
scheduler
.
event_loop_overlap
()
scheduler
.
event_loop_overlap
()
else
:
else
:
scheduler
.
event_loop_normal
()
scheduler
.
event_loop_normal
()
...
...
python/sglang/srt/managers/tp_worker_overlap_thread.py
View file @
a9e90b4b
...
@@ -26,7 +26,6 @@ import torch
...
@@ -26,7 +26,6 @@ import torch
from
sglang.srt.managers.io_struct
import
UpdateWeightReqInput
from
sglang.srt.managers.io_struct
import
UpdateWeightReqInput
from
sglang.srt.managers.schedule_batch
import
ModelWorkerBatch
from
sglang.srt.managers.schedule_batch
import
ModelWorkerBatch
from
sglang.srt.managers.tp_worker
import
TpModelWorker
from
sglang.srt.managers.tp_worker
import
TpModelWorker
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.server_args
import
ServerArgs
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -176,16 +175,8 @@ class TpModelWorkerClient:
...
@@ -176,16 +175,8 @@ class TpModelWorkerClient:
)
%
self
.
future_token_ids_limit
)
%
self
.
future_token_ids_limit
return
None
,
future_next_token_ids
return
None
,
future_next_token_ids
def
forward_batch_embedding
(
self
,
model_worker_batch
:
ModelWorkerBatch
):
forward_batch
=
ForwardBatch
.
init_new
(
model_worker_batch
,
self
.
model_runner
)
logits_output
=
self
.
model_runner
.
forward
(
forward_batch
)
embeddings
=
logits_output
.
embeddings
return
embeddings
def
update_weights
(
self
,
recv_req
:
UpdateWeightReqInput
):
def
update_weights
(
self
,
recv_req
:
UpdateWeightReqInput
):
success
,
message
=
self
.
model_runner
.
update_weights
(
success
,
message
=
self
.
worker
.
update_weights
(
recv_req
)
recv_req
.
model_path
,
recv_req
.
load_format
)
return
success
,
message
return
success
,
message
def
__delete__
(
self
):
def
__delete__
(
self
):
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
a9e90b4b
...
@@ -276,10 +276,6 @@ class ModelRunner:
...
@@ -276,10 +276,6 @@ class ModelRunner:
else
None
else
None
)
)
self
.
dtype
=
self
.
vllm_model_config
.
dtype
self
.
dtype
=
self
.
vllm_model_config
.
dtype
if
self
.
sliding_window_size
:
assert
(
self
.
server_args
.
attention_backend
==
"flashinfer"
),
"Only flashinfer supports window attention."
logger
.
info
(
logger
.
info
(
f
"Load weight end. "
f
"Load weight end. "
...
...
test/srt/test_triton_attention_backend.py
View file @
a9e90b4b
import
subprocess
"""
Usage:
python3 -m unittest test_triton_attention_backend.TestTritonAttnBackend.test_mmlu
"""
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment