Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
39a6a240
Unverified
Commit
39a6a240
authored
Apr 09, 2026
by
Schwinn Saereesitthipitak
Committed by
GitHub
Apr 09, 2026
Browse files
refactor: simplify GPU Memory Service integrations and module boundaries (#7875)
parent
02666f04
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
35 additions
and
22 deletions
+35
-22
tests/gms/common/test_gms_sglang_patches.py
tests/gms/common/test_gms_sglang_patches.py
+10
-3
tests/gms/common/test_gms_torch_allocator.py
tests/gms/common/test_gms_torch_allocator.py
+2
-1
tests/gms/conftest.py
tests/gms/conftest.py
+1
-10
tests/gms/harness/external_weight_writer.py
tests/gms/harness/external_weight_writer.py
+5
-3
tests/gms/harness/runtime.py
tests/gms/harness/runtime.py
+2
-1
tests/gms/integration/__init__.py
tests/gms/integration/__init__.py
+6
-0
tests/gms/integration/test_external_weight_mgr.py
tests/gms/integration/test_external_weight_mgr.py
+2
-1
tests/gms/integration/test_gms_shadow_failover.py
tests/gms/integration/test_gms_shadow_failover.py
+1
-1
tests/gms/integration/test_gms_sleep_wake.py
tests/gms/integration/test_gms_sleep_wake.py
+1
-1
tests/gms/integration/test_gms_torch_integration.py
tests/gms/integration/test_gms_torch_integration.py
+2
-1
tests/report_pytest_markers.py
tests/report_pytest_markers.py
+3
-0
No files found.
tests/gms/common/test_gms_sglang_patches.py
View file @
39a6a240
...
@@ -6,13 +6,16 @@ from __future__ import annotations
...
@@ -6,13 +6,16 @@ from __future__ import annotations
import
sys
import
sys
import
types
import
types
import
gpu_memory_service.integrations.sglang.patches
as
sglang_patches
import
pytest
import
pytest
from
gpu_memory_service.integrations.sglang
import
patches
as
sglang_patches
torch
=
pytest
.
importorskip
(
"torch"
,
reason
=
"torch is required"
)
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
sglang
,
]
]
...
@@ -37,8 +40,7 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
...
@@ -37,8 +40,7 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
)
)
class
FakeImpl
:
class
FakeImpl
:
def
get_imported_weights_bytes
(
self
):
imported_weights_bytes
=
8
<<
30
return
8
<<
30
fake_memory_saver
.
get_gms_memory_saver_impl
=
lambda
:
FakeImpl
()
fake_memory_saver
.
get_gms_memory_saver_impl
=
lambda
:
FakeImpl
()
...
@@ -55,6 +57,11 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
...
@@ -55,6 +57,11 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
fake_memory_saver
,
fake_memory_saver
,
)
)
monkeypatch
.
setattr
(
sglang_patches
,
"get_gms_memory_saver_impl"
,
lambda
:
FakeImpl
(),
)
monkeypatch
.
setattr
(
sglang_patches
,
"_model_runner_patched"
,
False
)
monkeypatch
.
setattr
(
sglang_patches
,
"_model_runner_patched"
,
False
)
monkeypatch
.
delattr
(
FakeModelRunner
,
"_gms_patched"
,
raising
=
False
)
monkeypatch
.
delattr
(
FakeModelRunner
,
"_gms_patched"
,
raising
=
False
)
monkeypatch
.
setattr
(
monkeypatch
.
setattr
(
...
...
tests/gms/common/test_gms_torch_allocator.py
View file @
39a6a240
...
@@ -5,11 +5,12 @@ from __future__ import annotations
...
@@ -5,11 +5,12 @@ from __future__ import annotations
import
pytest
import
pytest
from
gpu_memory_service.client.torch
import
allocator
as
allocator_module
from
gpu_memory_service.client.torch
import
allocator
as
allocator_module
from
gpu_memory_service.common.
type
s
import
GrantedLockType
,
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
GrantedLockType
,
RequestedLockType
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
none
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
gpu_0
,
]
]
...
...
tests/gms/conftest.py
View file @
39a6a240
...
@@ -5,16 +5,7 @@
...
@@ -5,16 +5,7 @@
import
pytest
import
pytest
# Skip collection entirely if gpu_memory_service is not installed.
from
tests.utils.port_utils
import
allocate_port
,
deallocate_ports
# noqa: E402
# This package lives under nested common/ and integration/ subdirectories, so
# we ignore those directories directly instead of only matching test files next
# to this conftest.
try
:
import
gpu_memory_service
# noqa: F401
except
ImportError
:
collect_ignore
=
[
"common"
,
"integration"
]
from
tests.utils.port_utils
import
allocate_port
,
deallocate_ports
@
pytest
.
fixture
@
pytest
.
fixture
...
...
tests/gms/harness/external_weight_writer.py
View file @
39a6a240
...
@@ -10,11 +10,13 @@ import subprocess
...
@@ -10,11 +10,13 @@ import subprocess
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
import
torch
import
torch
from
gpu_memory_service
import
get_or_create_gms_client_memory_manager
from
gpu_memory_service.client.memory_manager
import
GMSClientMemoryManager
from
gpu_memory_service.client.memory_manager
import
GMSClientMemoryManager
from
gpu_memory_service.client.torch.allocator
import
gms_use_mem_pool
from
gpu_memory_service.client.torch.allocator
import
(
get_or_create_gms_client_memory_manager
,
gms_use_mem_pool
,
)
from
gpu_memory_service.client.torch.module
import
register_module_tensors
from
gpu_memory_service.client.torch.module
import
register_module_tensors
from
gpu_memory_service.common.
type
s
import
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
RequestedLockType
from
gpu_memory_service.common.utils
import
get_socket_path
from
gpu_memory_service.common.utils
import
get_socket_path
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
...
...
tests/gms/harness/runtime.py
View file @
39a6a240
...
@@ -7,7 +7,6 @@ import logging
...
@@ -7,7 +7,6 @@ import logging
import
time
import
time
from
pathlib
import
Path
from
pathlib
import
Path
import
pynvml
import
requests
import
requests
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
...
@@ -19,6 +18,8 @@ MIN_EXPECTED_MEMORY_RETURN_FRACTION = 0.6
...
@@ -19,6 +18,8 @@ MIN_EXPECTED_MEMORY_RETURN_FRACTION = 0.6
def
get_gpu_memory_used
(
device
:
int
=
0
)
->
int
:
def
get_gpu_memory_used
(
device
:
int
=
0
)
->
int
:
import
pynvml
pynvml
.
nvmlInit
()
pynvml
.
nvmlInit
()
try
:
try
:
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
device
)
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
device
)
...
...
tests/gms/integration/__init__.py
View file @
39a6a240
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import
pytest
pytest
.
importorskip
(
"gpu_memory_service"
,
reason
=
"gpu_memory_service is required"
)
tests/gms/integration/test_external_weight_mgr.py
View file @
39a6a240
...
@@ -10,7 +10,8 @@ from typing import Callable, Protocol
...
@@ -10,7 +10,8 @@ from typing import Callable, Protocol
import
pytest
import
pytest
from
gpu_memory_service.client.session
import
_GMSClientSession
from
gpu_memory_service.client.session
import
_GMSClientSession
from
gpu_memory_service.common.types
import
RequestedLockType
,
ServerState
from
gpu_memory_service.common.locks
import
RequestedLockType
from
gpu_memory_service.server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
...
...
tests/gms/integration/test_gms_shadow_failover.py
View file @
39a6a240
...
@@ -12,7 +12,7 @@ from contextlib import ExitStack
...
@@ -12,7 +12,7 @@ from contextlib import ExitStack
from
typing
import
Callable
from
typing
import
Callable
import
pytest
import
pytest
from
gpu_memory_service.
common.types
import
ServerState
from
gpu_memory_service.
server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
...
...
tests/gms/integration/test_gms_sleep_wake.py
View file @
39a6a240
...
@@ -9,7 +9,7 @@ from contextlib import ExitStack
...
@@ -9,7 +9,7 @@ from contextlib import ExitStack
from
typing
import
Callable
from
typing
import
Callable
import
pytest
import
pytest
from
gpu_memory_service.
common.types
import
ServerState
from
gpu_memory_service.
server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
...
...
tests/gms/integration/test_gms_torch_integration.py
View file @
39a6a240
...
@@ -12,7 +12,7 @@ from gpu_memory_service.client.torch.module import (
...
@@ -12,7 +12,7 @@ from gpu_memory_service.client.torch.module import (
register_module_tensors
,
register_module_tensors
,
)
)
from
gpu_memory_service.client.torch.tensor
import
_tensor_from_pointer
from
gpu_memory_service.client.torch.tensor
import
_tensor_from_pointer
from
gpu_memory_service.common.
type
s
import
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
RequestedLockType
from
tests.gms.harness.gms
import
GMSServerProcess
from
tests.gms.harness.gms
import
GMSServerProcess
...
@@ -21,6 +21,7 @@ torch = pytest.importorskip("torch", reason="torch is required")
...
@@ -21,6 +21,7 @@ torch = pytest.importorskip("torch", reason="torch is required")
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
none
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
gpu_1
,
]
]
...
...
tests/report_pytest_markers.py
View file @
39a6a240
...
@@ -129,6 +129,7 @@ STUB_MODULES = [
...
@@ -129,6 +129,7 @@ STUB_MODULES = [
"gpu_memory_service.client.torch.module"
,
"gpu_memory_service.client.torch.module"
,
"gpu_memory_service.client.torch.tensor"
,
"gpu_memory_service.client.torch.tensor"
,
"gpu_memory_service.common"
,
"gpu_memory_service.common"
,
"gpu_memory_service.common.locks"
,
"gpu_memory_service.common.cuda_utils"
,
"gpu_memory_service.common.cuda_utils"
,
"gpu_memory_service.common.protocol"
,
"gpu_memory_service.common.protocol"
,
"gpu_memory_service.common.protocol.messages"
,
"gpu_memory_service.common.protocol.messages"
,
...
@@ -141,11 +142,13 @@ STUB_MODULES = [
...
@@ -141,11 +142,13 @@ STUB_MODULES = [
"gpu_memory_service.integrations.common"
,
"gpu_memory_service.integrations.common"
,
"gpu_memory_service.integrations.common.utils"
,
"gpu_memory_service.integrations.common.utils"
,
"gpu_memory_service.integrations.sglang"
,
"gpu_memory_service.integrations.sglang"
,
"gpu_memory_service.integrations.sglang.patches"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.vllm"
,
"gpu_memory_service.integrations.vllm"
,
"gpu_memory_service.integrations.vllm.worker"
,
"gpu_memory_service.integrations.vllm.worker"
,
"gpu_memory_service.server"
,
"gpu_memory_service.server"
,
"gpu_memory_service.server.allocations"
,
"gpu_memory_service.server.allocations"
,
"gpu_memory_service.server.fsm"
,
"gpu_memory_service.server.gms"
,
"gpu_memory_service.server.gms"
,
"gpu_memory_service.server.rpc"
,
"gpu_memory_service.server.rpc"
,
"gpu_memory_service.server.session"
,
"gpu_memory_service.server.session"
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment