refactor: use tempfile module instead of hardcoded /tmp paths (#5789)

Signed-off-by: Dan Gil <dagil@nvidia.com> Co-authored-by: Cursor <cursoragent@cursor.com>

refactor: use tempfile module instead of hardcoded /tmp paths (#5789)
Signed-off-by: Dan Gil <dagil@nvidia.com> Co-authored-by: Cursor <cursoragent@cursor.com>
0ef41ffe · dagil-nvidia · GitHub · 3c9ca3fc · 0ef41ffe · 0ef41ffe
Unverified Commit 0ef41ffe authored Feb 09, 2026 by dagil-nvidia Committed by GitHub Feb 09, 2026
3 changed files
--- a/components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
+++ b/components/src/dynamo/vllm/multimodal_handlers/encode_worker_handler.py
@@ -5,6 +5,7 @@ import asyncio
 import logging
 import os
 import shutil
+import tempfile
 import time
 from dataclasses import dataclass
 from typing import AsyncGenerator, AsyncIterator
@@ -86,6 +87,10 @@ class EncodeWorkerHandler:
        self.readables = []
        self.embedding_cache = EmbeddingCache()
+        # Use system temp directory for encoder cache files
+        self._cache_dir = os.path.join(tempfile.gettempdir(), "encoder_cache")
+        os.makedirs(self._cache_dir, exist_ok=True)
    def cleanup(self):
        pass
@@ -240,16 +245,12 @@ class EncodeWorkerHandler:
                        f"ENCODER: saving local safetensors file with key {embedding_item.key}, {embedding_item.embeddings_cpu.numel()} * {embedding_item.embeddings_cpu.element_size()} bytes"
                    )
                    tensors = {"ec_cache": embedding_item.embeddings_cpu}
-                    safetensors.torch.save_file(
+                    cache_path = os.path.join(
-                        tensors,
+                        self._cache_dir, f"{embedding_item.key}.safetensors"
-                        f"/tmp/encoder_cache.{embedding_item.key}.safetensors",
                    )
+                    safetensors.torch.save_file(tensors, cache_path)
                    # [gluo FIXME] need mechanism to clean up local files
-                    request.multimodal_inputs[
+                    request.multimodal_inputs[idx].serialized_request = cache_path
-                        idx
-                    ].serialized_request = (
-                        f"/tmp/encoder_cache.{embedding_item.key}.safetensors"
-                    )
                else:
                    descriptor = connect.Descriptor(embedding_item.embeddings_cpu)
                    self.readables.append(

--- a/deploy/utils/dynamo_deployment.py
+++ b/deploy/utils/dynamo_deployment.py
@@ -20,6 +20,7 @@ import re
 import socket
 import subprocess
 import sys
+import tempfile
 import time
 import uuid
 from pathlib import Path
@@ -546,8 +547,8 @@ async def main():
    parser.add_argument(
        "--log-dir",
        "-l",
-        default="/tmp/dynamo_logs",
+        default=os.path.join(tempfile.gettempdir(), "dynamo_logs"),
-        help="Base directory for logs (default: /tmp/dynamo_logs)",
+        help=f"Base directory for logs (default: {tempfile.gettempdir()}/dynamo_logs)",
    )
    parser.add_argument(
        "--service-name",

--- a/lib/gpu_memory_service/common/utils.py
+++ b/lib/gpu_memory_service/common/utils.py
@@ -3,6 +3,9 @@
 """Shared utilities for GPU Memory Service."""
+import os
+import tempfile
 import pynvml
@@ -16,7 +19,7 @@ def get_socket_path(device: int) -> str:
        device: CUDA device index.
    Returns:
-        Socket path (e.g., "/tmp/gms_GPU-12345678-1234-1234-1234-123456789abc.sock").
+        Socket path (e.g., "<tempdir>/gms_GPU-12345678-1234-1234-1234-123456789abc.sock").
    """
    pynvml.nvmlInit()
    try:
@@ -24,4 +27,4 @@ def get_socket_path(device: int) -> str:
        uuid = pynvml.nvmlDeviceGetUUID(handle)
    finally:
        pynvml.nvmlShutdown()
-    return f"/tmp/gms_{uuid}.sock"
+    return os.path.join(tempfile.gettempdir(), f"gms_{uuid}.sock")