Unverified Commit 512a0d4d authored by Olatunji Ruwase's avatar Olatunji Ruwase Committed by GitHub
Browse files

Fix index out of range error when parameter count is not multiple of ranks (#202)

parent c014a55b
...@@ -264,6 +264,13 @@ class FP16_DeepSpeedZeroOptimizer(object): ...@@ -264,6 +264,13 @@ class FP16_DeepSpeedZeroOptimizer(object):
flat_tensor_list = [] flat_tensor_list = []
current_size = 0 current_size = 0
if not tensor_list:
flat_tensor_list.append(
torch.zeros(int(partition_size),
dtype=dtype,
device=torch.cuda.current_device()))
return _flatten_dense_tensors(flat_tensor_list)
if dtype is None: if dtype is None:
dtype = tensor_list[0].dtype dtype = tensor_list[0].dtype
......
...@@ -315,3 +315,33 @@ def test_zero_allow_untested_optimizer(tmpdir): ...@@ -315,3 +315,33 @@ def test_zero_allow_untested_optimizer(tmpdir):
model_parameters=model.parameters()) model_parameters=model.parameters())
_test_zero_allow_untested_optimizer(args) _test_zero_allow_untested_optimizer(args)
def test_zero_empty_partition(tmpdir):
config_dict = {
"train_batch_size": 3,
"fp16": {
"enabled": True
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00015
}
},
"zero_optimization": True
}
args = args_from_dict(tmpdir, config_dict)
@distributed_test(world_size=[3])
def _test_zero_empty_partition(args):
hidden_dim = 1
model = SimpleModel(hidden_dim)
# Ensure model has 2 parameters, to cause empty partition with DP=3
assert len(list(model.parameters())) == 2
model, _, _, _ = deepspeed.initialize(args=args,
model=model,
model_parameters=model.parameters())
model.step()
_test_zero_empty_partition(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment