polish code

63469c0f · ver217 · 54fd37f0 · 63469c0f
Commit 63469c0f authored Mar 14, 2022 by ver217
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

colossalai/zero/shard_utils/bucket_tensor_shard_strategy.py colossalai/zero/shard_utils/bucket_tensor_shard_strategy.py +3 -0

No files found.
--- a/colossalai/zero/shard_utils/bucket_tensor_shard_strategy.py
+++ b/colossalai/zero/shard_utils/bucket_tensor_shard_strategy.py
@@ -23,6 +23,9 @@ class BucketTensorShardStrategy(TensorShardStrategy):
        for i in range(self.world_size):
            if i == self.local_rank:
                buffer_list.append(flatten([t.payload for t in tensor_list]).cuda(get_current_device()))
+                # Release payload here, to decrease peak memory usage
+                for t in tensor_list:
+                    t.reset_payload(None)
            else:
                buffer_list.append(torch.zeros(buffer_size, dtype=dtype, device=get_current_device()))
        dist.all_gather(buffer_list, buffer_list[self.local_rank], group=self.process_group)