Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8dbe0c52
Unverified
Commit
8dbe0c52
authored
Oct 23, 2025
by
hfan
Committed by
GitHub
Oct 23, 2025
Browse files
[Misc] Add TPU usage report when using tpu_inference. (#27423)
Signed-off-by:
Hongmin Fan
<
fanhongmin@google.com
>
parent
5cc6bddb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
10 deletions
+30
-10
vllm/usage/usage_lib.py
vllm/usage/usage_lib.py
+30
-10
No files found.
vllm/usage/usage_lib.py
View file @
8dbe0c52
...
@@ -176,6 +176,32 @@ class UsageMessage:
...
@@ -176,6 +176,32 @@ class UsageMessage:
self
.
_report_usage_once
(
model_architecture
,
usage_context
,
extra_kvs
)
self
.
_report_usage_once
(
model_architecture
,
usage_context
,
extra_kvs
)
self
.
_report_continuous_usage
()
self
.
_report_continuous_usage
()
def
_report_tpu_inference_usage
(
self
)
->
bool
:
try
:
from
tpu_inference
import
tpu_info
,
utils
self
.
gpu_count
=
tpu_info
.
get_num_chips
()
self
.
gpu_type
=
tpu_info
.
get_tpu_type
()
self
.
gpu_memory_per_device
=
utils
.
get_device_hbm_limit
()
self
.
cuda_runtime
=
"tpu_inference"
return
True
except
Exception
:
return
False
def
_report_torch_xla_usage
(
self
)
->
bool
:
try
:
import
torch_xla
self
.
gpu_count
=
torch_xla
.
runtime
.
world_size
()
self
.
gpu_type
=
torch_xla
.
tpu
.
get_tpu_type
()
self
.
gpu_memory_per_device
=
torch_xla
.
core
.
xla_model
.
get_memory_info
()[
"bytes_limit"
]
self
.
cuda_runtime
=
"torch_xla"
return
True
except
Exception
:
return
False
def
_report_usage_once
(
def
_report_usage_once
(
self
,
self
,
model_architecture
:
str
,
model_architecture
:
str
,
...
@@ -192,16 +218,10 @@ class UsageMessage:
...
@@ -192,16 +218,10 @@ class UsageMessage:
)
)
if
current_platform
.
is_cuda
():
if
current_platform
.
is_cuda
():
self
.
cuda_runtime
=
torch
.
version
.
cuda
self
.
cuda_runtime
=
torch
.
version
.
cuda
if
current_platform
.
is_tpu
():
if
current_platform
.
is_tpu
():
# noqa: SIM102
try
:
if
(
not
self
.
_report_tpu_inference_usage
())
and
(
import
torch_xla
not
self
.
_report_torch_xla_usage
()
):
self
.
gpu_count
=
torch_xla
.
runtime
.
world_size
()
self
.
gpu_type
=
torch_xla
.
tpu
.
get_tpu_type
()
self
.
gpu_memory_per_device
=
torch_xla
.
core
.
xla_model
.
get_memory_info
()[
"bytes_limit"
]
except
Exception
:
logger
.
exception
(
"Failed to collect TPU information"
)
logger
.
exception
(
"Failed to collect TPU information"
)
self
.
provider
=
_detect_cloud_provider
()
self
.
provider
=
_detect_cloud_provider
()
self
.
architecture
=
platform
.
machine
()
self
.
architecture
=
platform
.
machine
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment