Unverified Commit 4b048a87 authored by ver217's avatar ver217 Committed by GitHub
Browse files

fix prepare grads in sharded optim (#749)

parent 09777254
...@@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer): ...@@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
def _prepare_grads(self): def _prepare_grads(self):
for group in self.optim.param_groups: for group in self.optim.param_groups:
for p in group['params']: for p in group['params']:
if p.colo_attr.saved_grad.is_null():
continue
p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE) p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE)
# FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation # FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation
# If we change p.grad directly # If we change p.grad directly
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment