Unverified Commit 2350968e authored by Crutcher Dunnavant's avatar Crutcher Dunnavant Committed by GitHub
Browse files

Move f/utils => f/internal; move testing libs to fair_dev/testing (#1004)

parent 3b727945
...@@ -24,9 +24,9 @@ import torch ...@@ -24,9 +24,9 @@ import torch
import torch.nn.init as init import torch.nn.init as init
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from fair_dev.testing.testing import dist_init, set_random_seed, spawn_for_all_world_sizes
from fairscale.nn.model_parallel import initialize as mpu from fairscale.nn.model_parallel import initialize as mpu
from fairscale.nn.model_parallel import layers from fairscale.nn.model_parallel import layers
from fairscale.utils.testing import dist_init, set_random_seed, spawn_for_all_world_sizes
def run_test_parallel_embedding(rank, model_parallel_size, filename, filename_rpc): def run_test_parallel_embedding(rank, model_parallel_size, filename, filename_rpc):
......
...@@ -21,10 +21,10 @@ ...@@ -21,10 +21,10 @@
import torch import torch
from fair_dev.testing.testing import dist_init, spawn_for_all_world_sizes
from fairscale.nn.model_parallel import initialize as mpu from fairscale.nn.model_parallel import initialize as mpu
from fairscale.nn.model_parallel import random from fairscale.nn.model_parallel import random
from fairscale.nn.model_parallel.random import get_cuda_rng_tracker, model_parallel_cuda_manual_seed from fairscale.nn.model_parallel.random import get_cuda_rng_tracker, model_parallel_cuda_manual_seed
from fairscale.utils.testing import dist_init, spawn_for_all_world_sizes
def run_test_set_cuda_rng_state(rank, model_parallel_size, filename, filename_rpc): def run_test_set_cuda_rng_state(rank, model_parallel_size, filename, filename_rpc):
......
...@@ -11,9 +11,9 @@ import torch ...@@ -11,9 +11,9 @@ import torch
import torch.distributed as dist import torch.distributed as dist
import torch.multiprocessing as mp import torch.multiprocessing as mp
from fair_dev.testing.testing import make_cudnn_deterministic
from fairscale.internal import torch_version
from fairscale.nn import MOELayer, Top2Gate from fairscale.nn import MOELayer, Top2Gate
from fairscale.utils import torch_version
from fairscale.utils.testing import make_cudnn_deterministic
pytestmark = pytest.mark.skipif( pytestmark = pytest.mark.skipif(
not (torch.cuda.is_available() and torch_version() >= (1, 8, 0)), reason="cuda and torch>=1.8.0 required" not (torch.cuda.is_available() and torch_version() >= (1, 8, 0)), reason="cuda and torch>=1.8.0 required"
......
...@@ -21,10 +21,10 @@ import pytest ...@@ -21,10 +21,10 @@ import pytest
import torch import torch
from torch import nn from torch import nn
from fair_dev.testing.testing import skip_if_single_gpu
from fairscale.nn.pipe import Pipe from fairscale.nn.pipe import Pipe
from fairscale.nn.pipe.skip import pop, skippable, stash from fairscale.nn.pipe.skip import pop, skippable, stash
from fairscale.nn.pipe.skip.portal import PortalBlue, PortalCopy, PortalOrange from fairscale.nn.pipe.skip.portal import PortalBlue, PortalCopy, PortalOrange
from fairscale.utils.testing import skip_if_single_gpu
@skip_if_single_gpu @skip_if_single_gpu
......
...@@ -22,8 +22,8 @@ import torch ...@@ -22,8 +22,8 @@ import torch
from torch import nn from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
from fair_dev.testing.testing import skip_if_single_gpu
from fairscale.nn.pipe import Pipe from fairscale.nn.pipe import Pipe
from fairscale.utils.testing import skip_if_single_gpu
def test_python_autograd_function(): def test_python_autograd_function():
......
...@@ -20,9 +20,9 @@ from torch.nn import Linear, Sequential ...@@ -20,9 +20,9 @@ from torch.nn import Linear, Sequential
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.checkpoint import checkpoint as torch_checkpoint from torch.utils.checkpoint import checkpoint as torch_checkpoint
from fair_dev.testing.testing import skip_if_no_cuda, skip_if_single_gpu
from fairscale.nn.pipe.checkpoint import Checkpointing, Function, TensorOrTensors from fairscale.nn.pipe.checkpoint import Checkpointing, Function, TensorOrTensors
from fairscale.nn.pipe.microbatch import Batch from fairscale.nn.pipe.microbatch import Batch
from fairscale.utils.testing import skip_if_no_cuda, skip_if_single_gpu
# This test is mainly for checking pytorch & checkpointing behavior. pipe's checkpointing # This test is mainly for checking pytorch & checkpointing behavior. pipe's checkpointing
# code is tested already in another file. Therefore, we can run this test less frequently. # code is tested already in another file. Therefore, we can run this test less frequently.
......
...@@ -14,8 +14,8 @@ import numpy as np ...@@ -14,8 +14,8 @@ import numpy as np
import pytest import pytest
import torch import torch
from fair_dev.testing.testing import skip_if_single_gpu
from fairscale.nn import Pipe from fairscale.nn import Pipe
from fairscale.utils.testing import skip_if_single_gpu
def _get_model(num_inputs=2, num_hidden=20, num_outputs=2): def _get_model(num_inputs=2, num_hidden=20, num_outputs=2):
......
...@@ -22,8 +22,8 @@ import torch ...@@ -22,8 +22,8 @@ import torch
from torch import nn from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
from fair_dev.testing.testing import get_worker_map, torch_spawn
from fairscale.nn.pipe import AsyncPipe from fairscale.nn.pipe import AsyncPipe
from fairscale.utils.testing import get_worker_map, torch_spawn
@torch_spawn([2]) @torch_spawn([2])
......
...@@ -21,8 +21,8 @@ import pytest ...@@ -21,8 +21,8 @@ import pytest
import torch import torch
from torch import nn from torch import nn
from fair_dev.testing.testing import get_worker_map, torch_spawn
from fairscale.nn.pipe import AsyncPipe from fairscale.nn.pipe import AsyncPipe
from fairscale.utils.testing import get_worker_map, torch_spawn
@torch_spawn([2]) @torch_spawn([2])
......
...@@ -26,11 +26,11 @@ import pytest ...@@ -26,11 +26,11 @@ import pytest
import torch import torch
from torch import nn from torch import nn
from fair_dev.testing.testing import get_worker_map, torch_spawn
from fairscale.internal import torch_version
from fairscale.nn.model_parallel.initialize import get_pipeline_parallel_group from fairscale.nn.model_parallel.initialize import get_pipeline_parallel_group
from fairscale.nn.pipe import AsyncPipe from fairscale.nn.pipe import AsyncPipe
from fairscale.nn.pipe.types import LazyModule from fairscale.nn.pipe.types import LazyModule
from fairscale.utils import torch_version
from fairscale.utils.testing import get_worker_map, torch_spawn
@torch_spawn([2]) @torch_spawn([2])
......
...@@ -6,10 +6,10 @@ import torch ...@@ -6,10 +6,10 @@ import torch
from torch import nn from torch import nn
from torch.distributed import rpc from torch.distributed import rpc
from fair_dev.testing.testing import get_worker_map, torch_spawn
from fairscale.internal import torch_version
from fairscale.nn.model_parallel.initialize import get_pipeline_parallel_group from fairscale.nn.model_parallel.initialize import get_pipeline_parallel_group
from fairscale.nn.pipe import PipeRPCWrapper from fairscale.nn.pipe import PipeRPCWrapper
from fairscale.utils import torch_version
from fairscale.utils.testing import get_worker_map, torch_spawn
def init_rpc(): def init_rpc():
......
...@@ -21,8 +21,8 @@ import pytest ...@@ -21,8 +21,8 @@ import pytest
import torch import torch
from torch import nn from torch import nn
from fair_dev.testing.testing import get_worker_map, set_random_seed, torch_spawn
from fairscale.nn.pipe import AsyncPipe from fairscale.nn.pipe import AsyncPipe
from fairscale.utils.testing import get_worker_map, set_random_seed, torch_spawn
@torch_spawn([2]) @torch_spawn([2])
......
...@@ -12,9 +12,9 @@ import torch ...@@ -12,9 +12,9 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from fair_dev.testing.testing import DummyProcessGroup
from fairscale.nn import FullyShardedDataParallel as FSDP from fairscale.nn import FullyShardedDataParallel as FSDP
from fairscale.nn import auto_wrap, default_auto_wrap_policy, enable_wrap, wrap from fairscale.nn import auto_wrap, default_auto_wrap_policy, enable_wrap, wrap
from fairscale.utils.testing import DummyProcessGroup
try: try:
from torch.cuda.amp import autocast from torch.cuda.amp import autocast
......
...@@ -33,11 +33,11 @@ from torch.nn import Linear ...@@ -33,11 +33,11 @@ from torch.nn import Linear
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD from torch.optim import SGD
from fair_dev.testing.golden_testing_data import adascale_test_data
from fair_dev.testing.testing import skip_if_single_gpu
from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
from fairscale.nn.data_parallel import ShardedDataParallel as SDP from fairscale.nn.data_parallel import ShardedDataParallel as SDP
from fairscale.optim import OSS, AdaScale from fairscale.optim import OSS, AdaScale
from fairscale.utils.golden_testing_data import adascale_test_data
from fairscale.utils.testing import skip_if_single_gpu
def _dist_init(rank, world_size, tempfile_name, backend): def _dist_init(rank, world_size, tempfile_name, backend):
......
...@@ -18,8 +18,8 @@ import torchvision ...@@ -18,8 +18,8 @@ import torchvision
import torchvision.transforms as transforms import torchvision.transforms as transforms
from fair_dev.common_paths import DATASET_CACHE_ROOT from fair_dev.common_paths import DATASET_CACHE_ROOT
from fair_dev.testing.testing import skip_a_test_if_in_CI
from fairscale.optim.layerwise_gradient_scaler import LayerwiseGradientScaler from fairscale.optim.layerwise_gradient_scaler import LayerwiseGradientScaler
from fairscale.utils.testing import skip_a_test_if_in_CI
# Test: feed forward network # Test: feed forward network
......
...@@ -21,15 +21,15 @@ import torch.distributed as dist ...@@ -21,15 +21,15 @@ import torch.distributed as dist
import torch.multiprocessing as mp import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
import fairscale.optim as optim from fair_dev.testing.testing import (
from fairscale.utils import torch_version
from fairscale.utils.testing import (
check_same_model_params, check_same_model_params,
check_same_models_across_ranks, check_same_models_across_ranks,
skip_if_no_cuda, skip_if_no_cuda,
skip_if_py39_no_cuda, skip_if_py39_no_cuda,
skip_if_single_gpu, skip_if_single_gpu,
) )
from fairscale.internal import torch_version
import fairscale.optim as optim
BACKEND = dist.Backend.NCCL if torch.cuda.is_available() else dist.Backend.GLOO # type: ignore BACKEND = dist.Backend.NCCL if torch.cuda.is_available() else dist.Backend.GLOO # type: ignore
DEVICE = "cuda" if torch.cuda.is_available() else torch.device("cpu") DEVICE = "cuda" if torch.cuda.is_available() else torch.device("cpu")
......
...@@ -22,9 +22,9 @@ from torch.nn import Linear, Sequential ...@@ -22,9 +22,9 @@ from torch.nn import Linear, Sequential
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from torch.optim import SGD from torch.optim import SGD
from fair_dev.testing.golden_testing_data import adascale_test_data
from fair_dev.testing.testing import skip_if_single_gpu
from fairscale.optim import OSS, AdaScale, AdaScaleWrapper from fairscale.optim import OSS, AdaScale, AdaScaleWrapper
from fairscale.utils.golden_testing_data import adascale_test_data
from fairscale.utils.testing import skip_if_single_gpu
def _dist_init(rank, world_size, tempfile_name, backend): def _dist_init(rank, world_size, tempfile_name, backend):
......
...@@ -19,10 +19,10 @@ from torch.nn import Linear, Sequential ...@@ -19,10 +19,10 @@ from torch.nn import Linear, Sequential
from torch.optim import SGD from torch.optim import SGD
from torch.optim.lr_scheduler import LambdaLR from torch.optim.lr_scheduler import LambdaLR
from fair_dev.testing.golden_testing_data import adascale_test_data
from fair_dev.testing.testing import make_cudnn_deterministic, skip_if_no_cuda
from fairscale.fair_dev.testing.testing_memory import find_tensor_by_shape
from fairscale.optim import AdaScale from fairscale.optim import AdaScale
from fairscale.utils.golden_testing_data import adascale_test_data
from fairscale.utils.testing import make_cudnn_deterministic, skip_if_no_cuda
from fairscale.utils.testing_memory import find_tensor_by_shape
def test_basic_cpu(): def test_basic_cpu():
......
...@@ -16,7 +16,7 @@ import pytest ...@@ -16,7 +16,7 @@ import pytest
import torch import torch
import torch.nn as nn import torch.nn as nn
from fairscale.utils.containers import ( from fairscale.internal.containers import (
apply_to_tensors, apply_to_tensors,
pack_kwargs, pack_kwargs,
split_non_tensors, split_non_tensors,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
from parameterized import parameterized from parameterized import parameterized
import torch import torch
from fairscale.utils.parallel import chunk_and_pad from fairscale.internal.parallel import chunk_and_pad
@parameterized.expand([[num_chunks] for num_chunks in range(1, 33)]) @parameterized.expand([[num_chunks] for num_chunks in range(1, 33)])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment