Unverified Commit b1d4bd73 authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

fix for 16GB v100 nodes (#393)

parent 2dea61f2
......@@ -1585,6 +1585,7 @@ class FP16_DeepSpeedZeroOptimizer(object):
if self.cpu_offload:
torch.cuda.current_stream().wait_stream(self.migration_stream)
#TODO: we need to revist this and remove the magic 4.5x multiplier here
if self.contiguous_gradients:
self.ipg_buffer = []
buf_0 = torch.empty(int(self.reduce_bucket_size * 4.5),
......
......@@ -445,7 +445,9 @@ def test_zero_empty_partition(tmpdir, zero_stage, use_cpu_offload):
},
"zero_optimization": {
"stage": zero_stage,
"cpu_offload": use_cpu_offload
"cpu_offload": use_cpu_offload,
"reduce_bucket_size": 100,
"allgather_bucket_size": 100
}
}
args = args_from_dict(tmpdir, config_dict)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment