Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
39a6a240
"lib/runtime/src/vscode:/vscode.git/clone" did not exist on "651569ffa801e9c495a5cd71bb5b59cbc9575b0b"
Unverified
Commit
39a6a240
authored
Apr 09, 2026
by
Schwinn Saereesitthipitak
Committed by
GitHub
Apr 09, 2026
Browse files
refactor: simplify GPU Memory Service integrations and module boundaries (#7875)
parent
02666f04
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
35 additions
and
22 deletions
+35
-22
tests/gms/common/test_gms_sglang_patches.py
tests/gms/common/test_gms_sglang_patches.py
+10
-3
tests/gms/common/test_gms_torch_allocator.py
tests/gms/common/test_gms_torch_allocator.py
+2
-1
tests/gms/conftest.py
tests/gms/conftest.py
+1
-10
tests/gms/harness/external_weight_writer.py
tests/gms/harness/external_weight_writer.py
+5
-3
tests/gms/harness/runtime.py
tests/gms/harness/runtime.py
+2
-1
tests/gms/integration/__init__.py
tests/gms/integration/__init__.py
+6
-0
tests/gms/integration/test_external_weight_mgr.py
tests/gms/integration/test_external_weight_mgr.py
+2
-1
tests/gms/integration/test_gms_shadow_failover.py
tests/gms/integration/test_gms_shadow_failover.py
+1
-1
tests/gms/integration/test_gms_sleep_wake.py
tests/gms/integration/test_gms_sleep_wake.py
+1
-1
tests/gms/integration/test_gms_torch_integration.py
tests/gms/integration/test_gms_torch_integration.py
+2
-1
tests/report_pytest_markers.py
tests/report_pytest_markers.py
+3
-0
No files found.
tests/gms/common/test_gms_sglang_patches.py
View file @
39a6a240
...
@@ -6,13 +6,16 @@ from __future__ import annotations
...
@@ -6,13 +6,16 @@ from __future__ import annotations
import
sys
import
sys
import
types
import
types
import
gpu_memory_service.integrations.sglang.patches
as
sglang_patches
import
pytest
import
pytest
from
gpu_memory_service.integrations.sglang
import
patches
as
sglang_patches
torch
=
pytest
.
importorskip
(
"torch"
,
reason
=
"torch is required"
)
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
sglang
,
]
]
...
@@ -37,8 +40,7 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
...
@@ -37,8 +40,7 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
)
)
class
FakeImpl
:
class
FakeImpl
:
def
get_imported_weights_bytes
(
self
):
imported_weights_bytes
=
8
<<
30
return
8
<<
30
fake_memory_saver
.
get_gms_memory_saver_impl
=
lambda
:
FakeImpl
()
fake_memory_saver
.
get_gms_memory_saver_impl
=
lambda
:
FakeImpl
()
...
@@ -55,6 +57,11 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
...
@@ -55,6 +57,11 @@ def test_patch_model_runner_rewrites_total_gpu_memory(monkeypatch):
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
fake_memory_saver
,
fake_memory_saver
,
)
)
monkeypatch
.
setattr
(
sglang_patches
,
"get_gms_memory_saver_impl"
,
lambda
:
FakeImpl
(),
)
monkeypatch
.
setattr
(
sglang_patches
,
"_model_runner_patched"
,
False
)
monkeypatch
.
setattr
(
sglang_patches
,
"_model_runner_patched"
,
False
)
monkeypatch
.
delattr
(
FakeModelRunner
,
"_gms_patched"
,
raising
=
False
)
monkeypatch
.
delattr
(
FakeModelRunner
,
"_gms_patched"
,
raising
=
False
)
monkeypatch
.
setattr
(
monkeypatch
.
setattr
(
...
...
tests/gms/common/test_gms_torch_allocator.py
View file @
39a6a240
...
@@ -5,11 +5,12 @@ from __future__ import annotations
...
@@ -5,11 +5,12 @@ from __future__ import annotations
import
pytest
import
pytest
from
gpu_memory_service.client.torch
import
allocator
as
allocator_module
from
gpu_memory_service.client.torch
import
allocator
as
allocator_module
from
gpu_memory_service.common.
type
s
import
GrantedLockType
,
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
GrantedLockType
,
RequestedLockType
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
none
,
pytest
.
mark
.
gpu_0
,
pytest
.
mark
.
gpu_0
,
]
]
...
...
tests/gms/conftest.py
View file @
39a6a240
...
@@ -5,16 +5,7 @@
...
@@ -5,16 +5,7 @@
import
pytest
import
pytest
# Skip collection entirely if gpu_memory_service is not installed.
from
tests.utils.port_utils
import
allocate_port
,
deallocate_ports
# noqa: E402
# This package lives under nested common/ and integration/ subdirectories, so
# we ignore those directories directly instead of only matching test files next
# to this conftest.
try
:
import
gpu_memory_service
# noqa: F401
except
ImportError
:
collect_ignore
=
[
"common"
,
"integration"
]
from
tests.utils.port_utils
import
allocate_port
,
deallocate_ports
@
pytest
.
fixture
@
pytest
.
fixture
...
...
tests/gms/harness/external_weight_writer.py
View file @
39a6a240
...
@@ -10,11 +10,13 @@ import subprocess
...
@@ -10,11 +10,13 @@ import subprocess
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
import
torch
import
torch
from
gpu_memory_service
import
get_or_create_gms_client_memory_manager
from
gpu_memory_service.client.memory_manager
import
GMSClientMemoryManager
from
gpu_memory_service.client.memory_manager
import
GMSClientMemoryManager
from
gpu_memory_service.client.torch.allocator
import
gms_use_mem_pool
from
gpu_memory_service.client.torch.allocator
import
(
get_or_create_gms_client_memory_manager
,
gms_use_mem_pool
,
)
from
gpu_memory_service.client.torch.module
import
register_module_tensors
from
gpu_memory_service.client.torch.module
import
register_module_tensors
from
gpu_memory_service.common.
type
s
import
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
RequestedLockType
from
gpu_memory_service.common.utils
import
get_socket_path
from
gpu_memory_service.common.utils
import
get_socket_path
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
...
...
tests/gms/harness/runtime.py
View file @
39a6a240
...
@@ -7,7 +7,6 @@ import logging
...
@@ -7,7 +7,6 @@ import logging
import
time
import
time
from
pathlib
import
Path
from
pathlib
import
Path
import
pynvml
import
requests
import
requests
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
...
@@ -19,6 +18,8 @@ MIN_EXPECTED_MEMORY_RETURN_FRACTION = 0.6
...
@@ -19,6 +18,8 @@ MIN_EXPECTED_MEMORY_RETURN_FRACTION = 0.6
def
get_gpu_memory_used
(
device
:
int
=
0
)
->
int
:
def
get_gpu_memory_used
(
device
:
int
=
0
)
->
int
:
import
pynvml
pynvml
.
nvmlInit
()
pynvml
.
nvmlInit
()
try
:
try
:
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
device
)
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
device
)
...
...
tests/gms/integration/__init__.py
View file @
39a6a240
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import
pytest
pytest
.
importorskip
(
"gpu_memory_service"
,
reason
=
"gpu_memory_service is required"
)
tests/gms/integration/test_external_weight_mgr.py
View file @
39a6a240
...
@@ -10,7 +10,8 @@ from typing import Callable, Protocol
...
@@ -10,7 +10,8 @@ from typing import Callable, Protocol
import
pytest
import
pytest
from
gpu_memory_service.client.session
import
_GMSClientSession
from
gpu_memory_service.client.session
import
_GMSClientSession
from
gpu_memory_service.common.types
import
RequestedLockType
,
ServerState
from
gpu_memory_service.common.locks
import
RequestedLockType
from
gpu_memory_service.server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
...
...
tests/gms/integration/test_gms_shadow_failover.py
View file @
39a6a240
...
@@ -12,7 +12,7 @@ from contextlib import ExitStack
...
@@ -12,7 +12,7 @@ from contextlib import ExitStack
from
typing
import
Callable
from
typing
import
Callable
import
pytest
import
pytest
from
gpu_memory_service.
common.types
import
ServerState
from
gpu_memory_service.
server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
...
...
tests/gms/integration/test_gms_sleep_wake.py
View file @
39a6a240
...
@@ -9,7 +9,7 @@ from contextlib import ExitStack
...
@@ -9,7 +9,7 @@ from contextlib import ExitStack
from
typing
import
Callable
from
typing
import
Callable
import
pytest
import
pytest
from
gpu_memory_service.
common.types
import
ServerState
from
gpu_memory_service.
server.fsm
import
ServerState
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.constants
import
FAULT_TOLERANCE_MODEL_NAME
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
from
tests.utils.managed_process
import
DynamoFrontendProcess
,
ManagedProcess
...
...
tests/gms/integration/test_gms_torch_integration.py
View file @
39a6a240
...
@@ -12,7 +12,7 @@ from gpu_memory_service.client.torch.module import (
...
@@ -12,7 +12,7 @@ from gpu_memory_service.client.torch.module import (
register_module_tensors
,
register_module_tensors
,
)
)
from
gpu_memory_service.client.torch.tensor
import
_tensor_from_pointer
from
gpu_memory_service.client.torch.tensor
import
_tensor_from_pointer
from
gpu_memory_service.common.
type
s
import
RequestedLockType
from
gpu_memory_service.common.
lock
s
import
RequestedLockType
from
tests.gms.harness.gms
import
GMSServerProcess
from
tests.gms.harness.gms
import
GMSServerProcess
...
@@ -21,6 +21,7 @@ torch = pytest.importorskip("torch", reason="torch is required")
...
@@ -21,6 +21,7 @@ torch = pytest.importorskip("torch", reason="torch is required")
pytestmark
=
[
pytestmark
=
[
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
pre_merge
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
unit
,
pytest
.
mark
.
none
,
pytest
.
mark
.
gpu_1
,
pytest
.
mark
.
gpu_1
,
]
]
...
...
tests/report_pytest_markers.py
View file @
39a6a240
...
@@ -129,6 +129,7 @@ STUB_MODULES = [
...
@@ -129,6 +129,7 @@ STUB_MODULES = [
"gpu_memory_service.client.torch.module"
,
"gpu_memory_service.client.torch.module"
,
"gpu_memory_service.client.torch.tensor"
,
"gpu_memory_service.client.torch.tensor"
,
"gpu_memory_service.common"
,
"gpu_memory_service.common"
,
"gpu_memory_service.common.locks"
,
"gpu_memory_service.common.cuda_utils"
,
"gpu_memory_service.common.cuda_utils"
,
"gpu_memory_service.common.protocol"
,
"gpu_memory_service.common.protocol"
,
"gpu_memory_service.common.protocol.messages"
,
"gpu_memory_service.common.protocol.messages"
,
...
@@ -141,11 +142,13 @@ STUB_MODULES = [
...
@@ -141,11 +142,13 @@ STUB_MODULES = [
"gpu_memory_service.integrations.common"
,
"gpu_memory_service.integrations.common"
,
"gpu_memory_service.integrations.common.utils"
,
"gpu_memory_service.integrations.common.utils"
,
"gpu_memory_service.integrations.sglang"
,
"gpu_memory_service.integrations.sglang"
,
"gpu_memory_service.integrations.sglang.patches"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.sglang.memory_saver"
,
"gpu_memory_service.integrations.vllm"
,
"gpu_memory_service.integrations.vllm"
,
"gpu_memory_service.integrations.vllm.worker"
,
"gpu_memory_service.integrations.vllm.worker"
,
"gpu_memory_service.server"
,
"gpu_memory_service.server"
,
"gpu_memory_service.server.allocations"
,
"gpu_memory_service.server.allocations"
,
"gpu_memory_service.server.fsm"
,
"gpu_memory_service.server.gms"
,
"gpu_memory_service.server.gms"
,
"gpu_memory_service.server.rpc"
,
"gpu_memory_service.server.rpc"
,
"gpu_memory_service.server.session"
,
"gpu_memory_service.server.session"
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment