Unverified Commit e52f1cb6 authored by Serizao's avatar Serizao Committed by GitHub
Browse files

Update training_args.py - addition of self.distributed_state when using XPU (#25999)



* Update training_args.py

Missing distributed state so lign 1813-1814 failed because value is undefined

* Update training_args.py
Co-authored-by: default avatarZach Mueller <muellerzr@gmail.com>

---------
Co-authored-by: default avatarZach Mueller <muellerzr@gmail.com>
parent 0fced067
...@@ -1803,6 +1803,7 @@ class TrainingArguments: ...@@ -1803,6 +1803,7 @@ class TrainingArguments:
torch.cuda.set_device(device) torch.cuda.set_device(device)
elif is_torch_xpu_available() and "ACCELERATE_USE_XPU" not in os.environ: elif is_torch_xpu_available() and "ACCELERATE_USE_XPU" not in os.environ:
os.environ["ACCELERATE_USE_XPU"] = "true" os.environ["ACCELERATE_USE_XPU"] = "true"
self.distributed_state = PartialState(timeout=timedelta(seconds=self.ddp_timeout))
device = torch.device("xpu:0") device = torch.device("xpu:0")
self._n_gpu = 1 self._n_gpu = 1
elif is_sagemaker_dp_enabled(): elif is_sagemaker_dp_enabled():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment