Unverified Commit 8711c706 authored by Jiarui Fang's avatar Jiarui Fang Committed by GitHub
Browse files

[hotfix] fix grad offload when enabling reuse_fp16_shard

parents 4c4388c4 f1fa1a67
...@@ -299,6 +299,9 @@ class ShardedOptimizerV2(ColossalaiOptimizer): ...@@ -299,6 +299,9 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
if p.colo_attr.saved_grad.is_null(): if p.colo_attr.saved_grad.is_null():
continue continue
p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE) p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE)
# If reuse_fp16_shard, grad fp16 which wasn't be offloaded may be evicted to CPU
if not p.colo_attr.offload_grad:
colo_model_data_tensor_move_inline(p.colo_attr.grad_payload, torch.cuda.current_device())
# FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation # FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation
# If we change p.grad directly # If we change p.grad directly
# it may raise error because of different shape/dtype/device of p.data and p.grad # it may raise error because of different shape/dtype/device of p.data and p.grad
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment