Unverified Commit 9c875839 authored by statelesshz's avatar statelesshz Committed by GitHub
Browse files

add ascend npu accelerator support (#24879)

* Add Ascend NPU accelerator support

* fix style warining
parent f14c7f99
...@@ -721,6 +721,7 @@ _import_structure = { ...@@ -721,6 +721,7 @@ _import_structure = {
"is_tokenizers_available", "is_tokenizers_available",
"is_torch_available", "is_torch_available",
"is_torch_neuroncore_available", "is_torch_neuroncore_available",
"is_torch_npu_available",
"is_torch_tpu_available", "is_torch_tpu_available",
"is_torchvision_available", "is_torchvision_available",
"is_vision_available", "is_vision_available",
...@@ -4643,6 +4644,7 @@ if TYPE_CHECKING: ...@@ -4643,6 +4644,7 @@ if TYPE_CHECKING:
is_tokenizers_available, is_tokenizers_available,
is_torch_available, is_torch_available,
is_torch_neuroncore_available, is_torch_neuroncore_available,
is_torch_npu_available,
is_torch_tpu_available, is_torch_tpu_available,
is_torchvision_available, is_torchvision_available,
is_vision_available, is_vision_available,
......
...@@ -91,6 +91,7 @@ from .utils import ( ...@@ -91,6 +91,7 @@ from .utils import (
is_torch_bf16_cpu_available, is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available, is_torch_bf16_gpu_available,
is_torch_neuroncore_available, is_torch_neuroncore_available,
is_torch_npu_available,
is_torch_tensorrt_fx_available, is_torch_tensorrt_fx_available,
is_torch_tf32_available, is_torch_tf32_available,
is_torch_tpu_available, is_torch_tpu_available,
...@@ -587,6 +588,26 @@ def require_torch_neuroncore(test_case): ...@@ -587,6 +588,26 @@ def require_torch_neuroncore(test_case):
) )
def require_torch_npu(test_case):
"""
Decorator marking a test that requires NPU (in PyTorch).
"""
return unittest.skipUnless(is_torch_npu_available(), "test requires PyTorch NPU")(test_case)
def require_torch_multi_npu(test_case):
"""
Decorator marking a test that requires a multi-NPU setup (in PyTorch). These tests are skipped on a machine without
multiple NPUs.
To run *only* the multi_npu tests, assuming all test names contain multi_npu: $ pytest -sv ./tests -k "multi_npu"
"""
if not is_torch_npu_available():
return unittest.skip("test requires PyTorch NPU")(test_case)
return unittest.skipUnless(torch.npu.device_count() > 1, "test requires multiple NPUs")(test_case)
if is_torch_available(): if is_torch_available():
# Set env var CUDA_VISIBLE_DEVICES="" to force cpu-mode # Set env var CUDA_VISIBLE_DEVICES="" to force cpu-mode
import torch import torch
......
...@@ -36,6 +36,7 @@ from .utils import ( ...@@ -36,6 +36,7 @@ from .utils import (
is_torch_available, is_torch_available,
is_torch_cuda_available, is_torch_cuda_available,
is_torch_mps_available, is_torch_mps_available,
is_torch_npu_available,
is_torch_tpu_available, is_torch_tpu_available,
requires_backends, requires_backends,
) )
...@@ -94,6 +95,8 @@ def set_seed(seed: int): ...@@ -94,6 +95,8 @@ def set_seed(seed: int):
torch.manual_seed(seed) torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed) torch.cuda.manual_seed_all(seed)
# ^^ safe to call this function even if cuda is not available # ^^ safe to call this function even if cuda is not available
if is_torch_npu_available():
torch.npu.manual_seed_all(seed)
if is_tf_available(): if is_tf_available():
tf.random.set_seed(seed) tf.random.set_seed(seed)
......
...@@ -47,6 +47,7 @@ from .utils import ( ...@@ -47,6 +47,7 @@ from .utils import (
is_torch_bf16_cpu_available, is_torch_bf16_cpu_available,
is_torch_bf16_gpu_available, is_torch_bf16_gpu_available,
is_torch_neuroncore_available, is_torch_neuroncore_available,
is_torch_npu_available,
is_torch_tf32_available, is_torch_tf32_available,
is_torch_tpu_available, is_torch_tpu_available,
logging, logging,
...@@ -1368,12 +1369,13 @@ class TrainingArguments: ...@@ -1368,12 +1369,13 @@ class TrainingArguments:
self.framework == "pt" self.framework == "pt"
and is_torch_available() and is_torch_available()
and (self.device.type != "cuda") and (self.device.type != "cuda")
and (self.device.type != "npu")
and (get_xla_device_type(self.device) != "GPU") and (get_xla_device_type(self.device) != "GPU")
and (self.fp16 or self.fp16_full_eval) and (self.fp16 or self.fp16_full_eval)
): ):
raise ValueError( raise ValueError(
"FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation" "FP16 Mixed precision training with AMP or APEX (`--fp16`) and FP16 half precision evaluation"
" (`--fp16_full_eval`) can only be used on CUDA devices." " (`--fp16_full_eval`) can only be used on CUDA or NPU devices."
) )
if ( if (
...@@ -1769,6 +1771,10 @@ class TrainingArguments: ...@@ -1769,6 +1771,10 @@ class TrainingArguments:
elif self.use_cpu: elif self.use_cpu:
device = torch.device("cpu") device = torch.device("cpu")
self._n_gpu = 0 self._n_gpu = 0
elif is_torch_npu_available():
device = torch.device("npu:0")
torch.npu.set_device(device)
self._n_gpu = 1
else: else:
# if n_gpu is > 1 we'll use nn.DataParallel. # if n_gpu is > 1 we'll use nn.DataParallel.
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0` # If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
......
...@@ -164,6 +164,7 @@ from .import_utils import ( ...@@ -164,6 +164,7 @@ from .import_utils import (
is_torch_fx_proxy, is_torch_fx_proxy,
is_torch_mps_available, is_torch_mps_available,
is_torch_neuroncore_available, is_torch_neuroncore_available,
is_torch_npu_available,
is_torch_tensorrt_fx_available, is_torch_tensorrt_fx_available,
is_torch_tf32_available, is_torch_tf32_available,
is_torch_tpu_available, is_torch_tpu_available,
......
...@@ -397,6 +397,25 @@ def is_torch_neuroncore_available(check_device=True): ...@@ -397,6 +397,25 @@ def is_torch_neuroncore_available(check_device=True):
return False return False
@lru_cache()
def is_torch_npu_available(check_device=False):
"Checks if `torch_npu` is installed and potentially if a NPU is in the environment"
if not _torch_available or importlib.util.find_spec("torch_npu") is None:
return False
import torch
import torch_npu # noqa: F401
if check_device:
try:
# Will raise a RuntimeError if no NPU is found
_ = torch.npu.device_count()
return torch.npu.is_available()
except RuntimeError:
return False
return hasattr(torch, "npu") and torch.npu.is_available()
def is_torchdynamo_available(): def is_torchdynamo_available():
if not is_torch_available(): if not is_torch_available():
return False return False
......
...@@ -21,6 +21,7 @@ from transformers.testing_utils import ( ...@@ -21,6 +21,7 @@ from transformers.testing_utils import (
get_torch_dist_unique_port, get_torch_dist_unique_port,
require_torch_multi_gpu, require_torch_multi_gpu,
require_torch_neuroncore, require_torch_neuroncore,
require_torch_npu,
) )
from transformers.training_args import ParallelMode from transformers.training_args import ParallelMode
from transformers.utils import logging from transformers.utils import logging
...@@ -77,6 +78,20 @@ class TestTrainerDistributedNeuronCore(TestCasePlus): ...@@ -77,6 +78,20 @@ class TestTrainerDistributedNeuronCore(TestCasePlus):
# successful return here == success - any errors would have caused an error in the sub-call # successful return here == success - any errors would have caused an error in the sub-call
class TestTrainerDistributedNPU(TestCasePlus):
@require_torch_npu
def test_trainer(self):
distributed_args = f"""--nproc_per_node=2
--master_port={get_torch_dist_unique_port()}
{self.test_file_dir}/test_trainer_distributed.py
""".split()
output_dir = self.get_auto_remove_tmp_dir()
args = f"--output_dir {output_dir}".split()
cmd = ["torchrun"] + distributed_args + args
execute_subprocess_async(cmd, env=self.get_env())
# successful return here == success - any errors would have caused an error in the sub-call
class TestTrainerDistributed(TestCasePlus): class TestTrainerDistributed(TestCasePlus):
@require_torch_multi_gpu @require_torch_multi_gpu
def test_trainer(self): def test_trainer(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment