Commit 6065b946 authored by chenych's avatar chenych
Browse files

Update 0427

parent 2369eb2b
...@@ -76,10 +76,10 @@ class FSDPVLLMShardingManager(BaseShardingManager): ...@@ -76,10 +76,10 @@ class FSDPVLLMShardingManager(BaseShardingManager):
actor_weights = get_model_state_dict(self.module) actor_weights = get_model_state_dict(self.module)
print_gpu_memory_usage("After state_dict() in sharding manager") print_gpu_memory_usage("After state_dict() in sharding manager")
if "tags" in inspect.signature(self.inference_engine.wake_up).parameters: # if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
self.inference_engine.wake_up(tags=["weights"]) # self.inference_engine.wake_up(tags=["weights"])
else: # else:
self.inference_engine.wake_up() # self.inference_engine.wake_up()
model = self.inference_engine.llm_engine.model_executor.driver_worker.worker.model_runner.model model = self.inference_engine.llm_engine.model_executor.driver_worker.worker.model_runner.model
model.load_weights(self._make_weight_iterator(actor_weights)) model.load_weights(self._make_weight_iterator(actor_weights))
...@@ -101,8 +101,9 @@ class FSDPVLLMShardingManager(BaseShardingManager): ...@@ -101,8 +101,9 @@ class FSDPVLLMShardingManager(BaseShardingManager):
print_gpu_memory_usage("Before vllm offload in sharding manager") print_gpu_memory_usage("Before vllm offload in sharding manager")
free_bytes_before_sleep = torch.cuda.mem_get_info()[0] free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
# self.inference_engine.sleep(level=1) # self.inference_engine.sleep(level=1)
## rocm ## TODO DCU 怎么释放显存
# self.inference_engine.offload_model_weights() # self.inference_engine.offload_model_weights()
free_bytes_after_sleep = torch.cuda.mem_get_info()[0] free_bytes_after_sleep = torch.cuda.mem_get_info()[0]
self.freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep self.freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep
print_gpu_memory_usage("After vllm offload in sharding manager") print_gpu_memory_usage("After vllm offload in sharding manager")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment