Unverified Commit 0ef41ffe authored by dagil-nvidia's avatar dagil-nvidia Committed by GitHub
Browse files

refactor: use tempfile module instead of hardcoded /tmp paths (#5789)


Signed-off-by: default avatarDan Gil <dagil@nvidia.com>
Co-authored-by: default avatarCursor <cursoragent@cursor.com>
parent 3c9ca3fc
...@@ -5,6 +5,7 @@ import asyncio ...@@ -5,6 +5,7 @@ import asyncio
import logging import logging
import os import os
import shutil import shutil
import tempfile
import time import time
from dataclasses import dataclass from dataclasses import dataclass
from typing import AsyncGenerator, AsyncIterator from typing import AsyncGenerator, AsyncIterator
...@@ -86,6 +87,10 @@ class EncodeWorkerHandler: ...@@ -86,6 +87,10 @@ class EncodeWorkerHandler:
self.readables = [] self.readables = []
self.embedding_cache = EmbeddingCache() self.embedding_cache = EmbeddingCache()
# Use system temp directory for encoder cache files
self._cache_dir = os.path.join(tempfile.gettempdir(), "encoder_cache")
os.makedirs(self._cache_dir, exist_ok=True)
def cleanup(self): def cleanup(self):
pass pass
...@@ -240,16 +245,12 @@ class EncodeWorkerHandler: ...@@ -240,16 +245,12 @@ class EncodeWorkerHandler:
f"ENCODER: saving local safetensors file with key {embedding_item.key}, {embedding_item.embeddings_cpu.numel()} * {embedding_item.embeddings_cpu.element_size()} bytes" f"ENCODER: saving local safetensors file with key {embedding_item.key}, {embedding_item.embeddings_cpu.numel()} * {embedding_item.embeddings_cpu.element_size()} bytes"
) )
tensors = {"ec_cache": embedding_item.embeddings_cpu} tensors = {"ec_cache": embedding_item.embeddings_cpu}
safetensors.torch.save_file( cache_path = os.path.join(
tensors, self._cache_dir, f"{embedding_item.key}.safetensors"
f"/tmp/encoder_cache.{embedding_item.key}.safetensors",
) )
safetensors.torch.save_file(tensors, cache_path)
# [gluo FIXME] need mechanism to clean up local files # [gluo FIXME] need mechanism to clean up local files
request.multimodal_inputs[ request.multimodal_inputs[idx].serialized_request = cache_path
idx
].serialized_request = (
f"/tmp/encoder_cache.{embedding_item.key}.safetensors"
)
else: else:
descriptor = connect.Descriptor(embedding_item.embeddings_cpu) descriptor = connect.Descriptor(embedding_item.embeddings_cpu)
self.readables.append( self.readables.append(
......
...@@ -20,6 +20,7 @@ import re ...@@ -20,6 +20,7 @@ import re
import socket import socket
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
import uuid import uuid
from pathlib import Path from pathlib import Path
...@@ -546,8 +547,8 @@ async def main(): ...@@ -546,8 +547,8 @@ async def main():
parser.add_argument( parser.add_argument(
"--log-dir", "--log-dir",
"-l", "-l",
default="/tmp/dynamo_logs", default=os.path.join(tempfile.gettempdir(), "dynamo_logs"),
help="Base directory for logs (default: /tmp/dynamo_logs)", help=f"Base directory for logs (default: {tempfile.gettempdir()}/dynamo_logs)",
) )
parser.add_argument( parser.add_argument(
"--service-name", "--service-name",
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
"""Shared utilities for GPU Memory Service.""" """Shared utilities for GPU Memory Service."""
import os
import tempfile
import pynvml import pynvml
...@@ -16,7 +19,7 @@ def get_socket_path(device: int) -> str: ...@@ -16,7 +19,7 @@ def get_socket_path(device: int) -> str:
device: CUDA device index. device: CUDA device index.
Returns: Returns:
Socket path (e.g., "/tmp/gms_GPU-12345678-1234-1234-1234-123456789abc.sock"). Socket path (e.g., "<tempdir>/gms_GPU-12345678-1234-1234-1234-123456789abc.sock").
""" """
pynvml.nvmlInit() pynvml.nvmlInit()
try: try:
...@@ -24,4 +27,4 @@ def get_socket_path(device: int) -> str: ...@@ -24,4 +27,4 @@ def get_socket_path(device: int) -> str:
uuid = pynvml.nvmlDeviceGetUUID(handle) uuid = pynvml.nvmlDeviceGetUUID(handle)
finally: finally:
pynvml.nvmlShutdown() pynvml.nvmlShutdown()
return f"/tmp/gms_{uuid}.sock" return os.path.join(tempfile.gettempdir(), f"gms_{uuid}.sock")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment