conftest.py 5.2 KB
Newer Older
1
import random
2
3

import numpy as np
4
import pytest
5
import torch
6
import torchvision
7
8


9
10
torchvision.disable_beta_transforms_warning()

11
12
from common_utils import CUDA_NOT_AVAILABLE_MSG, IN_FBCODE, IN_OSS_CI, IN_RE_WORKER, OSS_CI_GPU_NO_CUDA_MSG

13

14
15
def pytest_configure(config):
    # register an additional marker (see pytest_collection_modifyitems)
16
17
    config.addinivalue_line("markers", "needs_cuda: mark for tests that rely on a CUDA device")
    config.addinivalue_line("markers", "dont_collect: mark for tests that should not be collected")
18
19
20


def pytest_collection_modifyitems(items):
21
    # This hook is called by pytest after it has collected the tests (google its name to check out its doc!)
22
    # We can ignore some tests as we see fit here, or add marks, such as a skip mark.
23
    #
24
    # Typically, here, we try to optimize CI time. In particular, the GPU CI instances don't need to run the
25
    # tests that don't need CUDA, because those tests are extensively tested in the CPU CI instances already.
26
    # This is true for both OSS CI and the fbcode internal CI.
27
28
29
    # In the fbcode CI, we have an additional constraint: we try to avoid skipping tests. So instead of relying on
    # pytest.mark.skip, in fbcode we literally just remove those tests from the `items` list, and it's as if
    # these tests never existed.
30
31
32

    out_items = []
    for item in items:
33
        # The needs_cuda mark will exist if the test was explicitly decorated with
34
35
36
37
38
        # the @needs_cuda decorator. It will also exist if it was parametrized with a
        # parameter that has the mark: for example if a test is parametrized with
        # @pytest.mark.parametrize('device', cpu_and_gpu())
        # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
        # and the ones with device == 'cpu' won't have the mark.
39
        needs_cuda = item.get_closest_marker("needs_cuda") is not None
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

        if needs_cuda and not torch.cuda.is_available():
            # In general, we skip cuda tests on machines without a GPU
            # There are special cases though, see below
            item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG))

        if IN_FBCODE:
            # fbcode doesn't like skipping tests, so instead we  just don't collect the test
            # so that they don't even "exist", hence the continue statements.
            if not needs_cuda and IN_RE_WORKER:
                # The RE workers are the machines with GPU, we don't want them to run CPU-only tests.
                continue
            if needs_cuda and not torch.cuda.is_available():
                # On the test machines without a GPU, we want to ignore the tests that need cuda.
                # TODO: something more robust would be to do that only in a sandcastle instance,
                # so that we can still see the test being skipped when testing locally from a devvm
                continue
57
        elif IN_OSS_CI:
58
59
            # Here we're not in fbcode, so we can safely collect and skip tests.
            if not needs_cuda and torch.cuda.is_available():
60
                # Similar to what happens in RE workers: we don't need the OSS CI GPU machines
61
                # to run the CPU-only tests.
62
                item.add_marker(pytest.mark.skip(reason=OSS_CI_GPU_NO_CUDA_MSG))
63

64
        if item.get_closest_marker("dont_collect") is not None:
65
            # currently, this is only used for some tests we're sure we don't want to run on fbcode
66
67
            continue

68
69
70
        out_items.append(item)

    items[:] = out_items
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86


def pytest_sessionfinish(session, exitstatus):
    # This hook is called after all tests have run, and just before returning an exit status.
    # We here change exit code 5 into 0.
    #
    # 5 is issued when no tests were actually run, e.g. if you use `pytest -k some_regex_that_is_never_matched`.
    #
    # Having no test being run for a given test rule is a common scenario in fbcode, and typically happens on
    # the GPU test machines which don't run the CPU-only tests (see pytest_collection_modifyitems above). For
    # example `test_transforms.py` doesn't contain any CUDA test at the time of
    # writing, so on a GPU test machine, testpilot would invoke pytest on this file and no test would be run.
    # This would result in pytest returning 5, causing testpilot to raise an error.
    # To avoid this, we transform this 5 into a 0 to make testpilot happy.
    if exitstatus == 5:
        session.exitstatus = 0
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109


@pytest.fixture(autouse=True)
def prevent_leaking_rng():
    # Prevent each test from leaking the rng to all other test when they call
    # torch.manual_seed() or random.seed() or np.random.seed().
    # Note: the numpy rngs should never leak anyway, as we never use
    # np.random.seed() and instead rely on np.random.RandomState instances (see
    # issue #4247). We still do it for extra precaution.

    torch_rng_state = torch.get_rng_state()
    builtin_rng_state = random.getstate()
    nunmpy_rng_state = np.random.get_state()
    if torch.cuda.is_available():
        cuda_rng_state = torch.cuda.get_rng_state()

    yield

    torch.set_rng_state(torch_rng_state)
    random.setstate(builtin_rng_state)
    np.random.set_state(nunmpy_rng_state)
    if torch.cuda.is_available():
        torch.cuda.set_rng_state(cuda_rng_state)