Unverified Commit ce32c69c authored by Alex Hedges's avatar Alex Hedges Committed by GitHub
Browse files

Use `config_dict_or_path` for deepspeed.zero.Init (#13614)

parent 0eb02871
...@@ -1340,7 +1340,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ...@@ -1340,7 +1340,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model") logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model")
# this immediately partitions the model across all gpus, to avoid the overhead in time # this immediately partitions the model across all gpus, to avoid the overhead in time
# and memory copying it on CPU or each GPU first # and memory copying it on CPU or each GPU first
with deepspeed.zero.Init(config=deepspeed_config()): with deepspeed.zero.Init(config_dict_or_path=deepspeed_config()):
with no_init_weights(_enable=_fast_init): with no_init_weights(_enable=_fast_init):
model = cls(config, *model_args, **model_kwargs) model = cls(config, *model_args, **model_kwargs)
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment