test_mock_gpu_alloc.py 2.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Mock GPU allocation test for profiler validation.

Local-only: this test is skipped in CI (GitHub Actions / GitLab CI).
Do NOT mark it as pre_merge, post_merge, nightly, or e2e -- it exists
solely to validate profile_pytest.py's binary search locally.
"""

import logging
import os
import time

import pytest

pytestmark = pytest.mark.skipif(
    os.environ.get("CI") is not None
    or os.environ.get("GITHUB_ACTIONS") is not None
    or os.environ.get("GITLAB_CI") is not None,
    reason="Mock GPU allocation test is for local profiling only, not CI",
)

torch = pytest.importorskip("torch", reason="torch required for GPU allocation test")

logger = logging.getLogger(__name__)

ALLOC_MIB = 4096  # 4 GiB


# This cannot be pre_merge, post_merge, nightly, or e2e. It's a mock test for local testing.
@pytest.mark.gpu_1
@pytest.mark.timeout(30)
def test_mock_4gb_gpu_alloc():
    """Allocate 4 GiB of GPU VRAM, hold 2s, release. Honors _PROFILE_PYTEST_VRAM_FRAC_OVERRIDE."""
    if not torch.cuda.is_available():
        pytest.skip("CUDA not available")

    device = 0
    total_mib = torch.cuda.get_device_properties(device).total_memory / (1024 * 1024)

    gpu_util = os.environ.get("_PROFILE_PYTEST_VRAM_FRAC_OVERRIDE")
    if gpu_util is not None:
        cap_mib = total_mib * float(gpu_util)
        logger.info(
            "_PROFILE_PYTEST_VRAM_FRAC_OVERRIDE=%.2f -> cap %.0f MiB (%.1f GiB) of %.0f MiB total",
            float(gpu_util),
            cap_mib,
            cap_mib / 1024,
            total_mib,
        )
        if ALLOC_MIB > cap_mib:
            raise RuntimeError(
                f"Requested {ALLOC_MIB} MiB exceeds _PROFILE_PYTEST_VRAM_FRAC_OVERRIDE "
                f"cap of {cap_mib:.0f} MiB ({gpu_util})"
            )

    num_elements = (ALLOC_MIB * 1024 * 1024) // 4
    logger.info(
        "Allocating %d MiB (%.1f GiB) on cuda:%d ...",
        ALLOC_MIB,
        ALLOC_MIB / 1024,
        device,
    )

    tensor = torch.empty(num_elements, dtype=torch.float32, device=f"cuda:{device}")
    logger.info(
        "Allocated. torch reports %.0f MiB in use.",
        torch.cuda.memory_allocated(device) / (1024 * 1024),
    )

    time.sleep(2.0)

    del tensor
    torch.cuda.empty_cache()
    logger.info(
        "Released. torch reports %.0f MiB in use.",
        torch.cuda.memory_allocated(device) / (1024 * 1024),
    )