Unverified Commit 39a6a240 authored by Schwinn Saereesitthipitak's avatar Schwinn Saereesitthipitak Committed by GitHub
Browse files

refactor: simplify GPU Memory Service integrations and module boundaries (#7875)

parent 02666f04
......@@ -6,13 +6,16 @@ from __future__ import annotations
import sys
import types
import gpu_memory_service.integrations.sglang.patches as sglang_patches
import pytest
from gpu_memory_service.integrations.sglang import patches as sglang_patches
torch = pytest.importorskip("torch", reason="torch is required")
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.gpu_0,
pytest.mark.sglang,
]
......@@ -37,8 +40,7 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
)
class FakeImpl:
def get_imported_weights_bytes(self):
return 8 << 30
imported_weights_bytes = 8 << 30
fake_memory_saver.get_gms_memory_saver_impl = lambda: FakeImpl()
......@@ -55,6 +57,11 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
"gpu_memory_service.integrations.sglang.memory_saver",
fake_memory_saver,
)
monkeypatch.setattr(
sglang_patches,
"get_gms_memory_saver_impl",
lambda: FakeImpl(),
)
monkeypatch.setattr(sglang_patches, "_model_runner_patched", False)
monkeypatch.delattr(FakeModelRunner, "_gms_patched", raising=False)
monkeypatch.setattr(
......
......@@ -5,11 +5,12 @@ from __future__ import annotations
import pytest
from gpu_memory_service.client.torch import allocator as allocator_module
from gpu_memory_service.common.types import GrantedLockType, RequestedLockType
from gpu_memory_service.common.locks import GrantedLockType, RequestedLockType
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.none,
pytest.mark.gpu_0,
]
......
......@@ -5,16 +5,7 @@
import pytest
# Skip collection entirely if gpu_memory_service is not installed.
# This package lives under nested common/ and integration/ subdirectories, so
# we ignore those directories directly instead of only matching test files next
# to this conftest.
try:
import gpu_memory_service # noqa: F401
except ImportError:
collect_ignore = ["common", "integration"]
from tests.utils.port_utils import allocate_port, deallocate_ports
from tests.utils.port_utils import allocate_port, deallocate_ports # noqa: E402
@pytest.fixture
......
......@@ -10,11 +10,13 @@ import subprocess
from contextlib import contextmanager
import torch
from gpu_memory_service import get_or_create_gms_client_memory_manager
from gpu_memory_service.client.memory_manager import GMSClientMemoryManager
from gpu_memory_service.client.torch.allocator import gms_use_mem_pool
from gpu_memory_service.client.torch.allocator import (
get_or_create_gms_client_memory_manager,
gms_use_mem_pool,
)
from gpu_memory_service.client.torch.module import register_module_tensors
from gpu_memory_service.common.types import RequestedLockType
from gpu_memory_service.common.locks import RequestedLockType
from gpu_memory_service.common.utils import get_socket_path
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
......
......@@ -7,7 +7,6 @@ import logging
import time
from pathlib import Path
import pynvml
import requests
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
......@@ -19,6 +18,8 @@ MIN_EXPECTED_MEMORY_RETURN_FRACTION = 0.6
def get_gpu_memory_used(device: int = 0) -> int:
import pynvml
pynvml.nvmlInit()
try:
handle = pynvml.nvmlDeviceGetHandleByIndex(device)
......
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import pytest
pytest.importorskip("gpu_memory_service", reason="gpu_memory_service is required")
......@@ -10,7 +10,8 @@ from typing import Callable, Protocol
import pytest
from gpu_memory_service.client.session import _GMSClientSession
from gpu_memory_service.common.types import RequestedLockType, ServerState
from gpu_memory_service.common.locks import RequestedLockType
from gpu_memory_service.server.fsm import ServerState
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
from tests.utils.managed_process import DynamoFrontendProcess
......
......@@ -12,7 +12,7 @@ from contextlib import ExitStack
from typing import Callable
import pytest
from gpu_memory_service.common.types import ServerState
from gpu_memory_service.server.fsm import ServerState
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess
......
......@@ -9,7 +9,7 @@ from contextlib import ExitStack
from typing import Callable
import pytest
from gpu_memory_service.common.types import ServerState
from gpu_memory_service.server.fsm import ServerState
from tests.utils.constants import FAULT_TOLERANCE_MODEL_NAME
from tests.utils.managed_process import DynamoFrontendProcess, ManagedProcess
......
......@@ -12,7 +12,7 @@ from gpu_memory_service.client.torch.module import (
register_module_tensors,
)
from gpu_memory_service.client.torch.tensor import _tensor_from_pointer
from gpu_memory_service.common.types import RequestedLockType
from gpu_memory_service.common.locks import RequestedLockType
from tests.gms.harness.gms import GMSServerProcess
......@@ -21,6 +21,7 @@ torch = pytest.importorskip("torch", reason="torch is required")
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.unit,
pytest.mark.none,
pytest.mark.gpu_1,
]
......
......@@ -129,6 +129,7 @@ STUB_MODULES = [
"gpu_memory_service.client.torch.module",
"gpu_memory_service.client.torch.tensor",
"gpu_memory_service.common",
"gpu_memory_service.common.locks",
"gpu_memory_service.common.cuda_utils",
"gpu_memory_service.common.protocol",
"gpu_memory_service.common.protocol.messages",
......@@ -141,11 +142,13 @@ STUB_MODULES = [
"gpu_memory_service.integrations.common",
"gpu_memory_service.integrations.common.utils",
"gpu_memory_service.integrations.sglang",
"gpu_memory_service.integrations.sglang.patches",
"gpu_memory_service.integrations.sglang.memory_saver",
"gpu_memory_service.integrations.vllm",
"gpu_memory_service.integrations.vllm.worker",
"gpu_memory_service.server",
"gpu_memory_service.server.allocations",
"gpu_memory_service.server.fsm",
"gpu_memory_service.server.gms",
"gpu_memory_service.server.rpc",
"gpu_memory_service.server.session",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment