Unverified Commit c2dd6c34 authored by Benjamin Lefaudeux's avatar Benjamin Lefaudeux Committed by GitHub
Browse files

[fix] ShardedDDP - cpu testfix - remove Gloo/CPU (#350)

* no idea about the root issue, but it proved to be fairly narrowed (gloo+cpu+python3.8+no cuda installed) so I guess that's out of scope for fairscale
parent dc05dd80
...@@ -31,6 +31,7 @@ import logging ...@@ -31,6 +31,7 @@ import logging
import multiprocessing import multiprocessing
import os import os
import random import random
import sys
import tempfile import tempfile
from typing import Any, Callable, Dict, List, Optional, Tuple from typing import Any, Callable, Dict, List, Optional, Tuple
...@@ -53,6 +54,10 @@ skip_if_single_gpu = pytest.mark.skipif( ...@@ -53,6 +54,10 @@ skip_if_single_gpu = pytest.mark.skipif(
not torch.cuda.is_available() or torch.cuda.device_count() < 2, reason="multiple GPUs required" not torch.cuda.is_available() or torch.cuda.device_count() < 2, reason="multiple GPUs required"
) )
skip_if_py38 = pytest.mark.skipif(
sys.version_info.major == 3 and sys.version_info.minor == 8, reason="Python3.8 is skipped"
)
_, filename_mpi = tempfile.mkstemp() _, filename_mpi = tempfile.mkstemp()
......
...@@ -21,7 +21,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP ...@@ -21,7 +21,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
from fairscale.nn.data_parallel import ShardedDataParallel from fairscale.nn.data_parallel import ShardedDataParallel
from fairscale.optim import OSS from fairscale.optim import OSS
from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_single_gpu from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_py38, skip_if_single_gpu
def run_one_step(rank, world_size, backend, device, temp_file_name): def run_one_step(rank, world_size, backend, device, temp_file_name):
...@@ -112,16 +112,17 @@ def run_test(backend, device, world_size=2): ...@@ -112,16 +112,17 @@ def run_test(backend, device, world_size=2):
mp.spawn(run_one_step, args=(world_size, backend, device, temp_file_name), nprocs=world_size, join=True) mp.spawn(run_one_step, args=(world_size, backend, device, temp_file_name), nprocs=world_size, join=True)
def test_step_on_cpu():
run_test(backend=dist.Backend.GLOO, device=torch.device("cpu"), world_size=4)
@skip_if_no_cuda @skip_if_no_cuda
@skip_if_single_gpu @skip_if_single_gpu
def test_step_on_gpu(): def test_step_gpu():
run_test(backend=dist.Backend.NCCL, device=torch.device("cuda")) run_test(backend=dist.Backend.NCCL, device=torch.device("cuda"))
@skip_if_py38
def test_step_cpu():
run_test(backend=dist.Backend.GLOO, device=torch.device("cpu"))
def run_ddp_parity(rank, world_size, backend, temp_file_name): def run_ddp_parity(rank, world_size, backend, temp_file_name):
url = "file://" + temp_file_name url = "file://" + temp_file_name
dist.init_process_group(init_method=url, backend=backend, rank=rank, world_size=world_size) dist.init_process_group(init_method=url, backend=backend, rank=rank, world_size=world_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment