Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
92ecd308
Unverified
Commit
92ecd308
authored
Jan 23, 2026
by
Schwinn Saereesitthipitak
Committed by
GitHub
Jan 24, 2026
Browse files
refactor: move GMS to standalone component (#5616)
parent
7fe89c74
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
61 deletions
+26
-61
components/src/dynamo/gpu_memory_service/__init__.py
components/src/dynamo/gpu_memory_service/__init__.py
+0
-44
lib/gpu_memory_service/__main__.py
lib/gpu_memory_service/__main__.py
+1
-1
lib/gpu_memory_service/cli/__init__.py
lib/gpu_memory_service/cli/__init__.py
+13
-0
lib/gpu_memory_service/cli/args.py
lib/gpu_memory_service/cli/args.py
+2
-2
lib/gpu_memory_service/cli/runner.py
lib/gpu_memory_service/cli/runner.py
+7
-14
lib/gpu_memory_service/pyproject.toml
lib/gpu_memory_service/pyproject.toml
+3
-0
No files found.
components/src/dynamo/gpu_memory_service/__init__.py
deleted
100644 → 0
View file @
7fe89c74
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""GPU Memory Service component for Dynamo.
This module provides the Dynamo component wrapper around the gpu_memory_service package.
The core functionality is in the gpu_memory_service package; this module provides:
- CLI entry point (python -m dynamo.gpu_memory_service)
- Re-exports for backwards compatibility
"""
# Re-export core functionality from gpu_memory_service package
from
gpu_memory_service
import
(
GMSClientMemoryManager
,
StaleMemoryLayoutError
,
get_gms_client_memory_manager
,
get_or_create_gms_client_memory_manager
,
)
# Re-export extensions (built separately)
try
:
from
gpu_memory_service.client.torch.extensions
import
_allocator_ext
except
(
ImportError
,
OSError
):
_allocator_ext
=
None
# Re-export module utilities
from
gpu_memory_service.client.torch.module
import
(
materialize_module_from_gms
,
register_module_tensors
,
)
__all__
=
[
# Core
"GMSClientMemoryManager"
,
"StaleMemoryLayoutError"
,
# GMS client memory manager
"get_or_create_gms_client_memory_manager"
,
"get_gms_client_memory_manager"
,
# Tensor utilities
"register_module_tensors"
,
"materialize_module_from_gms"
,
# Extensions
"_allocator_ext"
,
]
components/src/dynamo
/gpu_memory_service/__main__.py
→
lib
/gpu_memory_service/__main__.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dynamo.
gpu_memory_service.
serv
er
import
main
from
gpu_memory_service.
cli.runn
er
import
main
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
lib/gpu_memory_service/cli/__init__.py
0 → 100644
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""CLI for GPU Memory Service."""
from
gpu_memory_service.cli.args
import
Config
,
parse_args
from
gpu_memory_service.cli.runner
import
main
__all__
=
[
"Config"
,
"parse_args"
,
"main"
,
]
components/src/dynamo
/gpu_memory_service/args.py
→
lib
/gpu_memory_service/
cli/
args.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Argument parsing for GPU Memory Service server
component
."""
"""Argument parsing for GPU Memory Service server."""
import
argparse
import
argparse
import
logging
import
logging
...
@@ -23,7 +23,7 @@ class Config:
...
@@ -23,7 +23,7 @@ class Config:
def
parse_args
()
->
Config
:
def
parse_args
()
->
Config
:
"""Parse command line arguments for GPU Memory Service server."""
"""Parse command line arguments for GPU Memory Service server."""
parser
=
argparse
.
ArgumentParser
(
parser
=
argparse
.
ArgumentParser
(
description
=
"GPU Memory Service allocation server
for Dynamo
."
description
=
"GPU Memory Service allocation server."
)
)
# GPU Memory Service specific arguments
# GPU Memory Service specific arguments
...
...
components/src/dynamo
/gpu_memory_service/
serv
er.py
→
lib
/gpu_memory_service/
cli/runn
er.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""GPU Memory Service allocation server
component for Dynamo
.
"""GPU Memory Service allocation server
runner
.
This component wraps the GMSRPCServer from gpu_memory_service to manage
This module provides the CLI runner for the GPU Memory Service server,
GPU memory allocations with connection-based RW/RO locking.
which manages GPU memory allocations with connection-based RW/RO locking.
Workers connect via the socket path, which should be passed to vLLM/SGLang via:
--load-format gpu_memory_service
--model-loader-extra-config '{"gpu_memory_service_socket_path": "/tmp/gpu_memory_service_{device}.sock"}'
Usage:
Usage:
python -m
dynamo.
gpu_memory_service --device 0
python -m gpu_memory_service --device 0
python -m
dynamo.
gpu_memory_service --device 0 --socket-path /tmp/gpu_memory_service_{device}.sock
python -m gpu_memory_service --device 0 --socket-path /tmp/gpu_memory_service_{device}.sock
"""
"""
import
asyncio
import
asyncio
...
@@ -38,7 +34,7 @@ async def worker() -> None:
...
@@ -38,7 +34,7 @@ async def worker() -> None:
# Configure logging level
# Configure logging level
if
config
.
verbose
:
if
config
.
verbose
:
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
(
"
dynamo.
gpu_memory_service"
).
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
(
"gpu_memory_service"
).
setLevel
(
logging
.
DEBUG
)
logger
.
info
(
f
"Starting GPU Memory Service Server for device
{
config
.
device
}
"
)
logger
.
info
(
f
"Starting GPU Memory Service Server for device
{
config
.
device
}
"
)
logger
.
info
(
f
"Socket path:
{
config
.
socket_path
}
"
)
logger
.
info
(
f
"Socket path:
{
config
.
socket_path
}
"
)
...
@@ -59,10 +55,7 @@ async def worker() -> None:
...
@@ -59,10 +55,7 @@ async def worker() -> None:
await
server
.
start
()
await
server
.
start
()
logger
.
info
(
"GPU Memory Service Server ready, waiting for connections..."
)
logger
.
info
(
"GPU Memory Service Server ready, waiting for connections..."
)
logger
.
info
(
logger
.
info
(
f
"Clients can connect via socket:
{
config
.
socket_path
}
"
)
f
"To connect vLLM workers, use: --load-format gpu_memory_service "
f
'--model-loader-extra-config
\'
{{"gpu_memory_service_socket_path": "
{
config
.
socket_path
}
"}}
\'
'
)
# Wait for shutdown signal
# Wait for shutdown signal
try
:
try
:
...
...
lib/gpu_memory_service/pyproject.toml
View file @
92ecd308
...
@@ -34,6 +34,9 @@ classifiers = [
...
@@ -34,6 +34,9 @@ classifiers = [
]
]
keywords
=
[
"llm"
,
"genai"
,
"inference"
,
"nvidia"
,
"gpu"
,
"memory"
,
"dynamo"
]
keywords
=
[
"llm"
,
"genai"
,
"inference"
,
"nvidia"
,
"gpu"
,
"memory"
,
"dynamo"
]
[project.scripts]
gpu-memory-service
=
"gpu_memory_service.cli.runner:main"
[project.optional-dependencies]
[project.optional-dependencies]
test
=
[
test
=
[
"pytest>=8.3.4"
,
"pytest>=8.3.4"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment