Unverified Commit ca2b628b authored by Huazhong Ji's avatar Huazhong Ji Committed by GitHub
Browse files

[MISC] rename CudaMemoryProfiler to DeviceMemoryProfiler (#8703)

parent 8ca5051b
...@@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool: ...@@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool:
return True return True
class CudaMemoryProfiler: class DeviceMemoryProfiler:
def __init__(self, device: Optional[torch.types.Device] = None): def __init__(self, device: Optional[torch.types.Device] = None):
self.device = device self.device = device
......
...@@ -45,7 +45,7 @@ from vllm.prompt_adapter.worker_manager import ( ...@@ -45,7 +45,7 @@ from vllm.prompt_adapter.worker_manager import (
LRUCacheWorkerPromptAdapterManager) LRUCacheWorkerPromptAdapterManager)
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
from vllm.utils import (CudaMemoryProfiler, PyObjectCache, async_tensor_h2d, from vllm.utils import (DeviceMemoryProfiler, PyObjectCache, async_tensor_h2d,
flatten_2d_lists, is_hip, is_pin_memory_available, flatten_2d_lists, is_hip, is_pin_memory_available,
supports_dynamo) supports_dynamo)
from vllm.worker.model_runner_base import ( from vllm.worker.model_runner_base import (
...@@ -1012,7 +1012,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): ...@@ -1012,7 +1012,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
def load_model(self) -> None: def load_model(self) -> None:
logger.info("Starting to load model %s...", self.model_config.model) logger.info("Starting to load model %s...", self.model_config.model)
with CudaMemoryProfiler() as m: with DeviceMemoryProfiler() as m:
self.model = get_model(model_config=self.model_config, self.model = get_model(model_config=self.model_config,
device_config=self.device_config, device_config=self.device_config,
load_config=self.load_config, load_config=self.load_config,
......
...@@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs, ...@@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
MultiModalInputs, MultiModalRegistry) MultiModalInputs, MultiModalRegistry)
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
from vllm.utils import CudaMemoryProfiler, make_tensor_with_pad from vllm.utils import DeviceMemoryProfiler, make_tensor_with_pad
from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata
from vllm.worker.model_runner_base import ( from vllm.worker.model_runner_base import (
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase, ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
...@@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]): ...@@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
self.model: nn.Module # Set after init_Model self.model: nn.Module # Set after init_Model
def load_model(self) -> None: def load_model(self) -> None:
with CudaMemoryProfiler() as m: with DeviceMemoryProfiler() as m:
self.model = get_model( self.model = get_model(
model_config=self.model_config, model_config=self.model_config,
device_config=self.device_config, device_config=self.device_config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment