Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
92ecd308
"components/metrics/src/vscode:/vscode.git/clone" did not exist on "723f2da74bdaee785e3bc50e542c44737473b04d"
Unverified
Commit
92ecd308
authored
Jan 23, 2026
by
Schwinn Saereesitthipitak
Committed by
GitHub
Jan 24, 2026
Browse files
refactor: move GMS to standalone component (#5616)
parent
7fe89c74
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
61 deletions
+26
-61
components/src/dynamo/gpu_memory_service/__init__.py
components/src/dynamo/gpu_memory_service/__init__.py
+0
-44
lib/gpu_memory_service/__main__.py
lib/gpu_memory_service/__main__.py
+1
-1
lib/gpu_memory_service/cli/__init__.py
lib/gpu_memory_service/cli/__init__.py
+13
-0
lib/gpu_memory_service/cli/args.py
lib/gpu_memory_service/cli/args.py
+2
-2
lib/gpu_memory_service/cli/runner.py
lib/gpu_memory_service/cli/runner.py
+7
-14
lib/gpu_memory_service/pyproject.toml
lib/gpu_memory_service/pyproject.toml
+3
-0
No files found.
components/src/dynamo/gpu_memory_service/__init__.py
deleted
100644 → 0
View file @
7fe89c74
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""GPU Memory Service component for Dynamo.
This module provides the Dynamo component wrapper around the gpu_memory_service package.
The core functionality is in the gpu_memory_service package; this module provides:
- CLI entry point (python -m dynamo.gpu_memory_service)
- Re-exports for backwards compatibility
"""
# Re-export core functionality from gpu_memory_service package
from
gpu_memory_service
import
(
GMSClientMemoryManager
,
StaleMemoryLayoutError
,
get_gms_client_memory_manager
,
get_or_create_gms_client_memory_manager
,
)
# Re-export extensions (built separately)
try
:
from
gpu_memory_service.client.torch.extensions
import
_allocator_ext
except
(
ImportError
,
OSError
):
_allocator_ext
=
None
# Re-export module utilities
from
gpu_memory_service.client.torch.module
import
(
materialize_module_from_gms
,
register_module_tensors
,
)
__all__
=
[
# Core
"GMSClientMemoryManager"
,
"StaleMemoryLayoutError"
,
# GMS client memory manager
"get_or_create_gms_client_memory_manager"
,
"get_gms_client_memory_manager"
,
# Tensor utilities
"register_module_tensors"
,
"materialize_module_from_gms"
,
# Extensions
"_allocator_ext"
,
]
components/src/dynamo
/gpu_memory_service/__main__.py
→
lib
/gpu_memory_service/__main__.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dynamo.
gpu_memory_service.
serv
er
import
main
from
gpu_memory_service.
cli.runn
er
import
main
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
lib/gpu_memory_service/cli/__init__.py
0 → 100644
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""CLI for GPU Memory Service."""
from
gpu_memory_service.cli.args
import
Config
,
parse_args
from
gpu_memory_service.cli.runner
import
main
__all__
=
[
"Config"
,
"parse_args"
,
"main"
,
]
components/src/dynamo
/gpu_memory_service/args.py
→
lib
/gpu_memory_service/
cli/
args.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Argument parsing for GPU Memory Service server
component
."""
"""Argument parsing for GPU Memory Service server."""
import
argparse
import
argparse
import
logging
import
logging
...
@@ -23,7 +23,7 @@ class Config:
...
@@ -23,7 +23,7 @@ class Config:
def
parse_args
()
->
Config
:
def
parse_args
()
->
Config
:
"""Parse command line arguments for GPU Memory Service server."""
"""Parse command line arguments for GPU Memory Service server."""
parser
=
argparse
.
ArgumentParser
(
parser
=
argparse
.
ArgumentParser
(
description
=
"GPU Memory Service allocation server
for Dynamo
."
description
=
"GPU Memory Service allocation server."
)
)
# GPU Memory Service specific arguments
# GPU Memory Service specific arguments
...
...
components/src/dynamo
/gpu_memory_service/
serv
er.py
→
lib
/gpu_memory_service/
cli/runn
er.py
View file @
92ecd308
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""GPU Memory Service allocation server
component for Dynamo
.
"""GPU Memory Service allocation server
runner
.
This component wraps the GMSRPCServer from gpu_memory_service to manage
This module provides the CLI runner for the GPU Memory Service server,
GPU memory allocations with connection-based RW/RO locking.
which manages GPU memory allocations with connection-based RW/RO locking.
Workers connect via the socket path, which should be passed to vLLM/SGLang via:
--load-format gpu_memory_service
--model-loader-extra-config '{"gpu_memory_service_socket_path": "/tmp/gpu_memory_service_{device}.sock"}'
Usage:
Usage:
python -m
dynamo.
gpu_memory_service --device 0
python -m gpu_memory_service --device 0
python -m
dynamo.
gpu_memory_service --device 0 --socket-path /tmp/gpu_memory_service_{device}.sock
python -m gpu_memory_service --device 0 --socket-path /tmp/gpu_memory_service_{device}.sock
"""
"""
import
asyncio
import
asyncio
...
@@ -38,7 +34,7 @@ async def worker() -> None:
...
@@ -38,7 +34,7 @@ async def worker() -> None:
# Configure logging level
# Configure logging level
if
config
.
verbose
:
if
config
.
verbose
:
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
(
"
dynamo.
gpu_memory_service"
).
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
(
"gpu_memory_service"
).
setLevel
(
logging
.
DEBUG
)
logger
.
info
(
f
"Starting GPU Memory Service Server for device
{
config
.
device
}
"
)
logger
.
info
(
f
"Starting GPU Memory Service Server for device
{
config
.
device
}
"
)
logger
.
info
(
f
"Socket path:
{
config
.
socket_path
}
"
)
logger
.
info
(
f
"Socket path:
{
config
.
socket_path
}
"
)
...
@@ -59,10 +55,7 @@ async def worker() -> None:
...
@@ -59,10 +55,7 @@ async def worker() -> None:
await
server
.
start
()
await
server
.
start
()
logger
.
info
(
"GPU Memory Service Server ready, waiting for connections..."
)
logger
.
info
(
"GPU Memory Service Server ready, waiting for connections..."
)
logger
.
info
(
logger
.
info
(
f
"Clients can connect via socket:
{
config
.
socket_path
}
"
)
f
"To connect vLLM workers, use: --load-format gpu_memory_service "
f
'--model-loader-extra-config
\'
{{"gpu_memory_service_socket_path": "
{
config
.
socket_path
}
"}}
\'
'
)
# Wait for shutdown signal
# Wait for shutdown signal
try
:
try
:
...
...
lib/gpu_memory_service/pyproject.toml
View file @
92ecd308
...
@@ -34,6 +34,9 @@ classifiers = [
...
@@ -34,6 +34,9 @@ classifiers = [
]
]
keywords
=
[
"llm"
,
"genai"
,
"inference"
,
"nvidia"
,
"gpu"
,
"memory"
,
"dynamo"
]
keywords
=
[
"llm"
,
"genai"
,
"inference"
,
"nvidia"
,
"gpu"
,
"memory"
,
"dynamo"
]
[project.scripts]
gpu-memory-service
=
"gpu_memory_service.cli.runner:main"
[project.optional-dependencies]
[project.optional-dependencies]
test
=
[
test
=
[
"pytest>=8.3.4"
,
"pytest>=8.3.4"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment