"container/deps/vscode:/vscode.git/clone" did not exist on "e3857ba0d6228db8309f315e0598ff9d578a53ae"
Unverified Commit c9e8ce67 authored by ver217's avatar ver217 Committed by GitHub
Browse files

fix move fp32 shards (#1604)

parent eac1b793
...@@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer): ...@@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
fp32_shards_used_cuda_margin_mem = 0 fp32_shards_used_cuda_margin_mem = 0
for group in self.optim.param_groups: for group in self.optim.param_groups:
for p in group['params']: for p in group['params']:
if p.colo_attr.saved_grad.is_null():
continue
shard_mem = self.master_params[p].payload.numel() * self.master_params[p].payload.element_size() shard_mem = self.master_params[p].payload.numel() * self.master_params[p].payload.element_size()
if fp32_shards_used_cuda_margin_mem + shard_mem < fp32_shards_available_cuda_margin_mem: if fp32_shards_used_cuda_margin_mem + shard_mem < fp32_shards_available_cuda_margin_mem:
colo_model_data_tensor_move_inline(self.master_params[p], torch.cuda.current_device()) colo_model_data_tensor_move_inline(self.master_params[p], torch.cuda.current_device())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment