Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
82699474
"hip/fastermoe/smart_schedule.cpp" did not exist on "60fb332b66cea7d4bc2dcdb573f756277414f1a6"
Unverified
Commit
82699474
authored
Dec 16, 2024
by
Jerry Zhang
Committed by
GitHub
Dec 16, 2024
Browse files
Small fixes for torchao quant (#2476)
parent
7154b4b1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
5 deletions
+6
-5
python/sglang/srt/layers/torchao_utils.py
python/sglang/srt/layers/torchao_utils.py
+2
-1
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+4
-4
No files found.
python/sglang/srt/layers/torchao_utils.py
View file @
82699474
...
@@ -26,11 +26,12 @@ def apply_torchao_config_to_model(
...
@@ -26,11 +26,12 @@ def apply_torchao_config_to_model(
quantize_
,
quantize_
,
)
)
from
torchao.quantization.observer
import
PerRow
,
PerTensor
from
torchao.quantization.observer
import
PerRow
,
PerTensor
from
torchao.quantization.quant_api
import
_is_linear
if
filter_fn
is
None
:
if
filter_fn
is
None
:
def
filter_fn
(
module
,
fqn
):
def
filter_fn
(
module
,
fqn
):
return
"proj"
in
fqn
return
_is_linear
(
module
)
and
"proj"
in
fqn
if
torchao_config
==
""
or
torchao_config
is
None
:
if
torchao_config
==
""
or
torchao_config
is
None
:
return
model
return
model
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
82699474
...
@@ -157,6 +157,10 @@ class ModelRunner:
...
@@ -157,6 +157,10 @@ class ModelRunner:
self
.
sampler
=
Sampler
()
self
.
sampler
=
Sampler
()
self
.
load_model
()
self
.
load_model
()
apply_torchao_config_to_model
(
self
.
model
,
global_server_args_dict
[
"torchao_config"
]
)
# Apply torch TP if the model supports it
# Apply torch TP if the model supports it
supports_torch_tp
=
getattr
(
self
.
model
,
"supports_torch_tp"
,
False
)
supports_torch_tp
=
getattr
(
self
.
model
,
"supports_torch_tp"
,
False
)
if
self
.
tp_size
>
1
and
supports_torch_tp
:
if
self
.
tp_size
>
1
and
supports_torch_tp
:
...
@@ -165,10 +169,6 @@ class ModelRunner:
...
@@ -165,10 +169,6 @@ class ModelRunner:
else
:
else
:
self
.
torch_tp_applied
=
False
self
.
torch_tp_applied
=
False
apply_torchao_config_to_model
(
self
.
model
,
global_server_args_dict
[
"torchao_config"
]
)
# Init memory pool and attention backends
# Init memory pool and attention backends
if
server_args
.
lora_paths
is
not
None
:
if
server_args
.
lora_paths
is
not
None
:
self
.
init_lora_manager
()
self
.
init_lora_manager
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment