Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2a5f0100
"vscode:/vscode.git/clone" did not exist on "2ed3989cba9df9ba63ade79155a87051d81facb5"
Unverified
Commit
2a5f0100
authored
Jun 10, 2025
by
Baizhou Zhang
Committed by
GitHub
Jun 10, 2025
Browse files
Fix GGuf and add back test_gguf.py (#7067)
parent
dbdf76ca
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
6 deletions
+9
-6
python/sglang/srt/layers/linear.py
python/sglang/srt/layers/linear.py
+0
-4
python/sglang/srt/model_loader/loader.py
python/sglang/srt/model_loader/loader.py
+8
-1
test/srt/run_suite.py
test/srt/run_suite.py
+1
-1
No files found.
python/sglang/srt/layers/linear.py
View file @
2a5f0100
...
...
@@ -546,8 +546,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
param
.
shard_id
.
append
(
loaded_shard_id
)
param
.
shard_id_map
[
loaded_shard_id
]
=
len
(
param
.
data_container
)
param
.
data_container
.
append
(
loaded_weight
)
if
len
(
param
.
data_container
)
==
2
:
self
.
qweight
=
param
.
materialize_nested
()
return
param_data
=
param
.
data
...
...
@@ -961,8 +959,6 @@ class QKVParallelLinear(ColumnParallelLinear):
param
.
shard_id
.
append
(
loaded_shard_id
)
param
.
shard_id_map
[
loaded_shard_id
]
=
len
(
param
.
data_container
)
param
.
data_container
.
append
(
loaded_weight
)
if
len
(
param
.
data_container
)
==
3
:
self
.
qweight
=
param
.
materialize_nested
()
return
param_data
=
param
.
data
...
...
python/sglang/srt/model_loader/loader.py
View file @
2a5f0100
...
...
@@ -1259,12 +1259,19 @@ class GGUFModelLoader(BaseModelLoader):
):
model_config
.
hf_config
.
update
({
"tie_word_embeddings"
:
True
})
target_device
=
torch
.
device
(
device_config
.
device
)
with
set_default_torch_dtype
(
model_config
.
dtype
):
with
t
orch
.
device
(
device_config
.
device
)
:
with
t
arget_
device
:
model
=
_initialize_model
(
model_config
,
self
.
load_config
)
model
.
load_weights
(
self
.
_get_weights_iterator
(
local_model_path
,
gguf_weights_map
)
)
for
_
,
module
in
model
.
named_modules
():
quant_method
=
getattr
(
module
,
"quant_method"
,
None
)
if
quant_method
is
not
None
:
with
device_loading_context
(
module
,
target_device
):
quant_method
.
process_weights_after_loading
(
module
)
return
model
...
...
test/srt/run_suite.py
View file @
2a5f0100
...
...
@@ -186,7 +186,7 @@ suites = {
"vllm_dependency_test"
:
[
TestFile
(
"test_awq.py"
),
TestFile
(
"test_bnb.py"
),
#
TestFile("test_gguf.py", 78),
# TODO: Fix GGuf after updating to torch 2.7 and vllm 0.9
TestFile
(
"test_gguf.py"
,
78
),
TestFile
(
"test_gptqmodel_dynamic.py"
,
72
),
TestFile
(
"test_vllm_dependency.py"
),
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment