test_tensor.py 3.5 KB
Newer Older
1
import torch
ver217's avatar
ver217 committed
2
import pytest
3
4
5
from colossalai.tensor import ColoTensor
from numpy import allclose

6
7
8
9
10
11
12
13
14
15
16
import colossalai
from colossalai.utils import free_port
from colossalai.tensor import distspec, TensorSpec
from colossalai.core import global_context as gpc
import torch.multiprocessing as mp
from colossalai.testing import rerun_if_address_is_in_use
from colossalai.utils import free_port
from colossalai.tensor import distspec, TensorSpec, ColoTensor
from colossalai.context import ParallelMode
from functools import partial

17
18
19

def test_tensor_indexing():
    torch_t = torch.randn(2, 3)
ver217's avatar
ver217 committed
20
21
    colo_t = ColoTensor(torch_t)
    assert allclose(torch_t[:, 1], colo_t[:, 1])
22
23


ver217's avatar
ver217 committed
24
25
@pytest.mark.skip
# FIXME(ver217): support lazy init
26
27
28
29
def test_lazy_init_tensor():
    lazy_t = ColoTensor(2, 3, dtype=torch.float32, requires_grad=True)
    assert lazy_t._torch_tensor.numel() == 0
    assert lazy_t.numel() == 6 == lazy_t.torch_tensor().numel()
30
31
32
33


def test_wrapped_tensor_func():
    t_ref = torch.randn(4, 5)
ver217's avatar
ver217 committed
34
    t = ColoTensor.from_torch_tensor(t_ref.clone())
35
36
37
38
39
40

    # non-func attr
    assert t.is_cuda == t_ref.is_cuda

    # return 1 torch.Tensor
    t_abs = t.abs()
ver217's avatar
ver217 committed
41
    assert isinstance(t_abs, ColoTensor) and torch.equal(t_abs, t_ref.abs())
42
43
44
45
46
47
48
49
50
51
52

    # return 1 non-torch.Tensor
    assert t.dim() == t_ref.dim()

    # return >1 torch.Tensor
    t_split1, t_split2 = t.split(2)
    assert isinstance(t_split1, ColoTensor) and isinstance(t_split2, ColoTensor)


def test_operand():
    t_ref = torch.randn(4, 5)
ver217's avatar
ver217 committed
53
    t = ColoTensor.from_torch_tensor(t_ref.clone())
54
55
56
57

    t_ref_res = t_ref + t_ref
    t_res = t + t
    assert torch.allclose(t_ref_res, t_res)
58

59
60
61
62

#### Test Distributed init a Colotensor


63
64
65
def _run_view(world_size):
    t_ref = torch.randn(4, 5)
    t = ColoTensor.from_torch_tensor(
66
67
68
        t_ref,
        TensorSpec(distspec.shard(process_group=gpc.get_group(ParallelMode.DATA), dims=[0],
                                  num_partitions=[world_size])))
69

70
71
72
    assert t.size_global()[0] == 4 * world_size
    assert t.size_global(1) == 5
    assert t.size_global() == torch.Size([4 * world_size, 5])
73

74
    t.view_local(4 * 5)
75
76
    assert t.tensor_spec.dist_spec.placement.value == 's'

77
    t = t.view_global(4 * 5 * world_size)
78
    assert t.tensor_spec.dist_spec.placement.value == 'r'
79
80
81
    assert t.shape == torch.Size([4 * 5 * world_size])


82
83
84
85
86
87
def _run_tensor_shard_init(world_size):
    t_ref = torch.randn(4, 5)
    print(gpc.get_group(ParallelMode.DATA).size())
    shard_spec = distspec.shard(process_group=gpc.get_group(ParallelMode.DATA), dims=[0], num_partitions=[world_size])
    tensor_spec = TensorSpec(shard_spec)
    t = ColoTensor.from_torch_tensor(t_ref.clone(), tensor_spec)
88
    t.set_tensor_spec(TensorSpec(dist_spec=distspec.replicate()))
89
90
91
92
93
94
95
96
97
98
    assert t.shape == torch.Size((4 * world_size, 5))


def _run_tensor_replicated_init(world_size):
    t_ref = torch.randn(4 * world_size, 5)
    t = ColoTensor.from_torch_tensor(t_ref.clone())

    assert t.shape == torch.Size((4 * world_size, 5)), f"{t.shape}"


99
def run_dist_tests(rank, world_size, port):
100
101
102
    colossalai.launch(config={}, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
    _run_tensor_shard_init(world_size)
    _run_tensor_replicated_init(world_size)
103
    _run_view(world_size)
104
105
106
107
108


@pytest.mark.dist
@pytest.mark.parametrize('world_size', [1, 2])
@rerun_if_address_is_in_use()
109
def test_dist_cases(world_size):
110
    run_func = partial(run_dist_tests, world_size=world_size, port=free_port())
111
112
113
114
    mp.spawn(run_func, nprocs=world_size)


if __name__ == '__main__':
115
    test_dist_cases(2)