Unverified Commit d3bfcbf5 authored by Benjamin Lefaudeux's avatar Benjamin Lefaudeux Committed by GitHub
Browse files

[fix][SDP] Lightning-compat: deactivating buckets for a single rank, not useful (#514)

* Deactivating buckets for a single rank, not crashing but not useful
parent d217278c
......@@ -160,6 +160,11 @@ class ShardedDataParallel(nn.Module):
# - setup buckets and tensor views
model_size = sum([p.numel() for p in self.module.parameters()])
self.buffer_max_size = min(reduce_buffer_size, model_size)
if dist.get_world_size(self.process_group) == 1:
self.buffer_max_size = 0
logging.info("Training is not really distributed, single rank. Deactivating buckets")
logging.info(
"ShardedDDP bucket size: {:.2f}M parameters, model size {:.2f}M parameters".format(
self.buffer_max_size / 2 ** 20, model_size / 2 ** 20
......
......@@ -408,9 +408,9 @@ def run_test_gpt2(rank, world_size, backend, device, temp_file_name):
@skip_if_no_cuda
@skip_if_single_gpu
def test_gpt2():
# Check that the ShardedDDP wrapper accepts tuple(tensors) as inputs
world_size = 2
@pytest.mark.parametrize("world_size", [1, 2])
def test_gpt2(world_size):
# Check that having trainable unused params is fine
backend = "gloo"
temp_file_name = tempfile.mkstemp()[1]
device = "cuda"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment