Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
80694951
"vscode:/vscode.git/clone" did not exist on "34553b9d2702dd2a27a578fec819e88e76dcbfb4"
Unverified
Commit
80694951
authored
Aug 02, 2024
by
youkaichao
Committed by
GitHub
Aug 02, 2024
Browse files
[ci] set timeout for test_oot_registration.py (#7082)
parent
c16eaac5
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
2 deletions
+10
-2
tests/entrypoints/openai/test_oot_registration.py
tests/entrypoints/openai/test_oot_registration.py
+4
-0
vllm/worker/worker.py
vllm/worker/worker.py
+3
-1
vllm/worker/xpu_worker.py
vllm/worker/xpu_worker.py
+3
-1
No files found.
tests/entrypoints/openai/test_oot_registration.py
View file @
80694951
...
...
@@ -36,10 +36,12 @@ def test_oot_registration_for_api_server():
ctx
=
torch
.
multiprocessing
.
get_context
()
server
=
ctx
.
Process
(
target
=
server_function
,
args
=
(
port
,
))
server
.
start
()
MAX_SERVER_START_WAIT_S
=
60
client
=
OpenAI
(
base_url
=
f
"http://localhost:
{
port
}
/v1"
,
api_key
=
"token-abc123"
,
)
now
=
time
.
time
()
while
True
:
try
:
completion
=
client
.
chat
.
completions
.
create
(
...
...
@@ -57,6 +59,8 @@ def test_oot_registration_for_api_server():
except
OpenAIError
as
e
:
if
"Connection error"
in
str
(
e
):
time
.
sleep
(
3
)
if
time
.
time
()
-
now
>
MAX_SERVER_START_WAIT_S
:
raise
RuntimeError
(
"Server did not start in time"
)
from
e
else
:
raise
e
server
.
kill
()
...
...
vllm/worker/worker.py
View file @
80694951
...
...
@@ -186,7 +186,9 @@ class Worker(LocalOrDistributedWorkerBase):
# GPU did not change their memory usage during the profiling.
peak_memory
=
self
.
init_gpu_memory
-
free_gpu_memory
assert
peak_memory
>
0
,
(
"Error in memory profiling. This happens when the GPU memory was "
"Error in memory profiling. "
f
"Initial free memory
{
self
.
init_gpu_memory
}
, current free memory"
f
"
{
free_gpu_memory
}
. This happens when the GPU memory was "
"not properly cleaned up before initializing the vLLM instance."
)
cache_block_size
=
self
.
get_cache_block_size_bytes
()
...
...
vllm/worker/xpu_worker.py
View file @
80694951
...
...
@@ -138,7 +138,9 @@ class XPUWorker(LoraNotSupportedWorkerBase, Worker):
# GPU did not change their memory usage during the profiling.
peak_memory
=
self
.
init_gpu_memory
-
free_gpu_memory
assert
peak_memory
>
0
,
(
"Error in memory profiling. This happens when the GPU memory was "
"Error in memory profiling. "
f
"Initial free memory
{
self
.
init_gpu_memory
}
, current free memory"
f
"
{
free_gpu_memory
}
. This happens when the GPU memory was "
"not properly cleaned up before initializing the vLLM instance."
)
cache_block_size
=
self
.
get_cache_block_size_bytes
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment