Unverified Commit 512a0d4d authored by Olatunji Ruwase's avatar Olatunji Ruwase Committed by GitHub
Browse files

Fix index out of range error when parameter count is not multiple of ranks (#202)

parent c014a55b
......@@ -264,6 +264,13 @@ class FP16_DeepSpeedZeroOptimizer(object):
flat_tensor_list = []
current_size = 0
if not tensor_list:
flat_tensor_list.append(
torch.zeros(int(partition_size),
dtype=dtype,
device=torch.cuda.current_device()))
return _flatten_dense_tensors(flat_tensor_list)
if dtype is None:
dtype = tensor_list[0].dtype
......
......@@ -315,3 +315,33 @@ def test_zero_allow_untested_optimizer(tmpdir):
model_parameters=model.parameters())
_test_zero_allow_untested_optimizer(args)
def test_zero_empty_partition(tmpdir):
config_dict = {
"train_batch_size": 3,
"fp16": {
"enabled": True
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00015
}
},
"zero_optimization": True
}
args = args_from_dict(tmpdir, config_dict)
@distributed_test(world_size=[3])
def _test_zero_empty_partition(args):
hidden_dim = 1
model = SimpleModel(hidden_dim)
# Ensure model has 2 parameters, to cause empty partition with DP=3
assert len(list(model.parameters())) == 2
model, _, _, _ = deepspeed.initialize(args=args,
model=model,
model_parameters=model.parameters())
model.step()
_test_zero_empty_partition(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment