Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
EasyR1
Commits
6065b946
"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "df7c4c19b4b93ec5b7ba9f4e227f802f8d82a246"
Commit
6065b946
authored
Apr 27, 2025
by
chenych
Browse files
Update 0427
parent
2369eb2b
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
verl/workers/sharding_manager/fsdp_vllm.py
verl/workers/sharding_manager/fsdp_vllm.py
+6
-5
No files found.
verl/workers/sharding_manager/fsdp_vllm.py
View file @
6065b946
...
@@ -76,10 +76,10 @@ class FSDPVLLMShardingManager(BaseShardingManager):
...
@@ -76,10 +76,10 @@ class FSDPVLLMShardingManager(BaseShardingManager):
actor_weights
=
get_model_state_dict
(
self
.
module
)
actor_weights
=
get_model_state_dict
(
self
.
module
)
print_gpu_memory_usage
(
"After state_dict() in sharding manager"
)
print_gpu_memory_usage
(
"After state_dict() in sharding manager"
)
if
"tags"
in
inspect
.
signature
(
self
.
inference_engine
.
wake_up
).
parameters
:
#
if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
self
.
inference_engine
.
wake_up
(
tags
=
[
"weights"
])
#
self.inference_engine.wake_up(tags=["weights"])
else
:
#
else:
self
.
inference_engine
.
wake_up
()
#
self.inference_engine.wake_up()
model
=
self
.
inference_engine
.
llm_engine
.
model_executor
.
driver_worker
.
worker
.
model_runner
.
model
model
=
self
.
inference_engine
.
llm_engine
.
model_executor
.
driver_worker
.
worker
.
model_runner
.
model
model
.
load_weights
(
self
.
_make_weight_iterator
(
actor_weights
))
model
.
load_weights
(
self
.
_make_weight_iterator
(
actor_weights
))
...
@@ -101,8 +101,9 @@ class FSDPVLLMShardingManager(BaseShardingManager):
...
@@ -101,8 +101,9 @@ class FSDPVLLMShardingManager(BaseShardingManager):
print_gpu_memory_usage
(
"Before vllm offload in sharding manager"
)
print_gpu_memory_usage
(
"Before vllm offload in sharding manager"
)
free_bytes_before_sleep
=
torch
.
cuda
.
mem_get_info
()[
0
]
free_bytes_before_sleep
=
torch
.
cuda
.
mem_get_info
()[
0
]
# self.inference_engine.sleep(level=1)
# self.inference_engine.sleep(level=1)
##
rocm
##
TODO DCU 怎么释放显存
# self.inference_engine.offload_model_weights()
# self.inference_engine.offload_model_weights()
free_bytes_after_sleep
=
torch
.
cuda
.
mem_get_info
()[
0
]
free_bytes_after_sleep
=
torch
.
cuda
.
mem_get_info
()[
0
]
self
.
freed_bytes
=
free_bytes_after_sleep
-
free_bytes_before_sleep
self
.
freed_bytes
=
free_bytes_after_sleep
-
free_bytes_before_sleep
print_gpu_memory_usage
(
"After vllm offload in sharding manager"
)
print_gpu_memory_usage
(
"After vllm offload in sharding manager"
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment