Unverified Commit c2dd6c34 authored by Benjamin Lefaudeux's avatar Benjamin Lefaudeux Committed by GitHub
Browse files

[fix] ShardedDDP - cpu testfix - remove Gloo/CPU (#350)

* no idea about the root issue, but it proved to be fairly narrowed (gloo+cpu+python3.8+no cuda installed) so I guess that's out of scope for fairscale
parent dc05dd80
......@@ -31,6 +31,7 @@ import logging
import multiprocessing
import os
import random
import sys
import tempfile
from typing import Any, Callable, Dict, List, Optional, Tuple
......@@ -53,6 +54,10 @@ skip_if_single_gpu = pytest.mark.skipif(
not torch.cuda.is_available() or torch.cuda.device_count() < 2, reason="multiple GPUs required"
)
skip_if_py38 = pytest.mark.skipif(
sys.version_info.major == 3 and sys.version_info.minor == 8, reason="Python3.8 is skipped"
)
_, filename_mpi = tempfile.mkstemp()
......
......@@ -21,7 +21,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
from fairscale.nn.data_parallel import ShardedDataParallel
from fairscale.optim import OSS
from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_single_gpu
from fairscale.utils.testing import GPT2, skip_if_no_cuda, skip_if_py38, skip_if_single_gpu
def run_one_step(rank, world_size, backend, device, temp_file_name):
......@@ -112,16 +112,17 @@ def run_test(backend, device, world_size=2):
mp.spawn(run_one_step, args=(world_size, backend, device, temp_file_name), nprocs=world_size, join=True)
def test_step_on_cpu():
run_test(backend=dist.Backend.GLOO, device=torch.device("cpu"), world_size=4)
@skip_if_no_cuda
@skip_if_single_gpu
def test_step_on_gpu():
def test_step_gpu():
run_test(backend=dist.Backend.NCCL, device=torch.device("cuda"))
@skip_if_py38
def test_step_cpu():
run_test(backend=dist.Backend.GLOO, device=torch.device("cpu"))
def run_ddp_parity(rank, world_size, backend, temp_file_name):
url = "file://" + temp_file_name
dist.init_process_group(init_method=url, backend=backend, rank=rank, world_size=world_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment