Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55fb97f7
Unverified
Commit
55fb97f7
authored
Dec 26, 2024
by
Robert Shaw
Committed by
GitHub
Dec 26, 2024
Browse files
[2/N] API Server: Avoid ulimit footgun (#11530)
parent
2072924d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
2 deletions
+26
-2
vllm/entrypoints/api_server.py
vllm/entrypoints/api_server.py
+3
-1
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+5
-1
vllm/utils.py
vllm/utils.py
+18
-0
No files found.
vllm/entrypoints/api_server.py
View file @
55fb97f7
...
...
@@ -21,7 +21,7 @@ from vllm.entrypoints.utils import with_cancellation
from
vllm.logger
import
init_logger
from
vllm.sampling_params
import
SamplingParams
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
,
random_uuid
from
vllm.utils
import
FlexibleArgumentParser
,
random_uuid
,
set_ulimit
from
vllm.version
import
__version__
as
VLLM_VERSION
logger
=
init_logger
(
"vllm.entrypoints.api_server"
)
...
...
@@ -119,6 +119,8 @@ async def run_server(args: Namespace,
logger
.
info
(
"vLLM API server version %s"
,
VLLM_VERSION
)
logger
.
info
(
"args: %s"
,
args
)
set_ulimit
()
app
=
await
init_app
(
args
,
llm_engine
)
assert
engine
is
not
None
...
...
vllm/entrypoints/openai/api_server.py
View file @
55fb97f7
...
...
@@ -68,7 +68,7 @@ from vllm.entrypoints.utils import with_cancellation
from
vllm.logger
import
init_logger
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
(
FlexibleArgumentParser
,
get_open_zmq_ipc_path
,
is_valid_ipv6_address
)
is_valid_ipv6_address
,
set_ulimit
)
from
vllm.version
import
__version__
as
VLLM_VERSION
TIMEOUT_KEEP_ALIVE
=
5
# seconds
...
...
@@ -727,6 +727,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
sock_addr
=
(
args
.
host
or
""
,
args
.
port
)
sock
=
create_server_socket
(
sock_addr
)
# workaround to avoid footguns where uvicorn drops requests with too
# many concurrent requests active
set_ulimit
()
def
signal_handler
(
*
_
)
->
None
:
# Interrupt server on sigterm while initializing
raise
KeyboardInterrupt
(
"terminated"
)
...
...
vllm/utils.py
View file @
55fb97f7
...
...
@@ -12,6 +12,7 @@ import inspect
import
ipaddress
import
os
import
re
import
resource
import
signal
import
socket
import
subprocess
...
...
@@ -1818,3 +1819,20 @@ def memory_profiling(
result
.
non_torch_increase_in_bytes
=
current_cuda_memory_bytes
-
baseline_memory_in_bytes
-
weights_memory_in_bytes
-
diff
.
torch_memory_in_bytes
# noqa
result
.
profile_time
=
diff
.
timestamp
result
.
non_kv_cache_memory_in_bytes
=
result
.
non_torch_increase_in_bytes
+
result
.
torch_peak_increase_in_bytes
+
result
.
weights_memory_in_bytes
# noqa
# Adapted from: https://github.com/sgl-project/sglang/blob/f46f394f4d4dbe4aae85403dec006199b34d2840/python/sglang/srt/utils.py#L630 # noqa: E501Curre
def
set_ulimit
(
target_soft_limit
=
65535
):
resource_type
=
resource
.
RLIMIT_NOFILE
current_soft
,
current_hard
=
resource
.
getrlimit
(
resource_type
)
if
current_soft
<
target_soft_limit
:
try
:
resource
.
setrlimit
(
resource_type
,
(
target_soft_limit
,
current_hard
))
except
ValueError
as
e
:
logger
.
warning
(
"Found ulimit of %s and failed to automatically increase"
"with error %s. This can cause fd limit errors like"
"`OSError: [Errno 24] Too many open files`. Consider "
"increasing with ulimit -n"
,
current_soft
,
e
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment