Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ca2b628b
Unverified
Commit
ca2b628b
authored
Sep 23, 2024
by
Huazhong Ji
Committed by
GitHub
Sep 22, 2024
Browse files
[MISC] rename CudaMemoryProfiler to DeviceMemoryProfiler (#8703)
parent
8ca5051b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
5 deletions
+5
-5
vllm/utils.py
vllm/utils.py
+1
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+2
-2
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+2
-2
No files found.
vllm/utils.py
View file @
ca2b628b
...
@@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool:
...
@@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool:
return
True
return
True
class
Cuda
MemoryProfiler
:
class
Device
MemoryProfiler
:
def
__init__
(
self
,
device
:
Optional
[
torch
.
types
.
Device
]
=
None
):
def
__init__
(
self
,
device
:
Optional
[
torch
.
types
.
Device
]
=
None
):
self
.
device
=
device
self
.
device
=
device
...
...
vllm/worker/model_runner.py
View file @
ca2b628b
...
@@ -45,7 +45,7 @@ from vllm.prompt_adapter.worker_manager import (
...
@@ -45,7 +45,7 @@ from vllm.prompt_adapter.worker_manager import (
LRUCacheWorkerPromptAdapterManager
)
LRUCacheWorkerPromptAdapterManager
)
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.utils
import
(
Cuda
MemoryProfiler
,
PyObjectCache
,
async_tensor_h2d
,
from
vllm.utils
import
(
Device
MemoryProfiler
,
PyObjectCache
,
async_tensor_h2d
,
flatten_2d_lists
,
is_hip
,
is_pin_memory_available
,
flatten_2d_lists
,
is_hip
,
is_pin_memory_available
,
supports_dynamo
)
supports_dynamo
)
from
vllm.worker.model_runner_base
import
(
from
vllm.worker.model_runner_base
import
(
...
@@ -1012,7 +1012,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
...
@@ -1012,7 +1012,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
def
load_model
(
self
)
->
None
:
def
load_model
(
self
)
->
None
:
logger
.
info
(
"Starting to load model %s..."
,
self
.
model_config
.
model
)
logger
.
info
(
"Starting to load model %s..."
,
self
.
model_config
.
model
)
with
Cuda
MemoryProfiler
()
as
m
:
with
Device
MemoryProfiler
()
as
m
:
self
.
model
=
get_model
(
model_config
=
self
.
model_config
,
self
.
model
=
get_model
(
model_config
=
self
.
model_config
,
device_config
=
self
.
device_config
,
device_config
=
self
.
device_config
,
load_config
=
self
.
load_config
,
load_config
=
self
.
load_config
,
...
...
vllm/worker/xpu_model_runner.py
View file @
ca2b628b
...
@@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
...
@@ -21,7 +21,7 @@ from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
MultiModalInputs
,
MultiModalRegistry
)
MultiModalInputs
,
MultiModalRegistry
)
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.utils
import
Cuda
MemoryProfiler
,
make_tensor_with_pad
from
vllm.utils
import
Device
MemoryProfiler
,
make_tensor_with_pad
from
vllm.worker.model_runner
import
AttentionMetadata
,
SamplingMetadata
from
vllm.worker.model_runner
import
AttentionMetadata
,
SamplingMetadata
from
vllm.worker.model_runner_base
import
(
from
vllm.worker.model_runner_base
import
(
ModelRunnerBase
,
ModelRunnerInputBase
,
ModelRunnerInputBuilderBase
,
ModelRunnerBase
,
ModelRunnerInputBase
,
ModelRunnerInputBuilderBase
,
...
@@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
...
@@ -391,7 +391,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
self
.
model
:
nn
.
Module
# Set after init_Model
self
.
model
:
nn
.
Module
# Set after init_Model
def
load_model
(
self
)
->
None
:
def
load_model
(
self
)
->
None
:
with
Cuda
MemoryProfiler
()
as
m
:
with
Device
MemoryProfiler
()
as
m
:
self
.
model
=
get_model
(
self
.
model
=
get_model
(
model_config
=
self
.
model_config
,
model_config
=
self
.
model_config
,
device_config
=
self
.
device_config
,
device_config
=
self
.
device_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment