Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
334d64d1
Unverified
Commit
334d64d1
authored
Nov 26, 2024
by
youkaichao
Committed by
GitHub
Nov 26, 2024
Browse files
[ci] add vllm_test_utils (#10659)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
94063534
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
113 additions
and
61 deletions
+113
-61
Dockerfile
Dockerfile
+4
-0
Dockerfile.cpu
Dockerfile.cpu
+4
-0
Dockerfile.hpu
Dockerfile.hpu
+3
-0
Dockerfile.neuron
Dockerfile.neuron
+3
-0
Dockerfile.openvino
Dockerfile.openvino
+3
-0
Dockerfile.ppc64le
Dockerfile.ppc64le
+3
-0
Dockerfile.rocm
Dockerfile.rocm
+3
-0
Dockerfile.tpu
Dockerfile.tpu
+3
-0
Dockerfile.xpu
Dockerfile.xpu
+2
-1
tests/entrypoints/llm/test_lazy_outlines.py
tests/entrypoints/llm/test_lazy_outlines.py
+16
-7
tests/test_lazy_torch_compile.py
tests/test_lazy_torch_compile.py
+1
-53
tests/vllm_test_utils/setup.py
tests/vllm_test_utils/setup.py
+7
-0
tests/vllm_test_utils/vllm_test_utils/__init__.py
tests/vllm_test_utils/vllm_test_utils/__init__.py
+8
-0
tests/vllm_test_utils/vllm_test_utils/blame.py
tests/vllm_test_utils/vllm_test_utils/blame.py
+53
-0
No files found.
Dockerfile
View file @
334d64d1
...
@@ -191,6 +191,10 @@ ADD . /vllm-workspace/
...
@@ -191,6 +191,10 @@ ADD . /vllm-workspace/
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-dev.txt
python3
-m
pip
install
-r
requirements-dev.txt
# install development dependencies (for testing)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-e
tests/vllm_test_utils
# enable fast downloads from hf (for testing)
# enable fast downloads from hf (for testing)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
hf_transfer
python3
-m
pip
install
hf_transfer
...
...
Dockerfile.cpu
View file @
334d64d1
...
@@ -62,4 +62,8 @@ WORKDIR /workspace/
...
@@ -62,4 +62,8 @@ WORKDIR /workspace/
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -e tests/vllm_test_utils
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Dockerfile.hpu
View file @
334d64d1
...
@@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
...
@@ -11,6 +11,9 @@ ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
WORKDIR /workspace/
WORKDIR /workspace/
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
...
...
Dockerfile.neuron
View file @
334d64d1
...
@@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron
...
@@ -38,4 +38,7 @@ ENV VLLM_TARGET_DEVICE neuron
RUN --mount=type=bind,source=.git,target=.git \
RUN --mount=type=bind,source=.git,target=.git \
pip install --no-build-isolation -v -e .
pip install --no-build-isolation -v -e .
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
CMD ["/bin/bash"]
CMD ["/bin/bash"]
Dockerfile.openvino
View file @
334d64d1
...
@@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC
...
@@ -22,4 +22,7 @@ RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" VLLM_TARGET_DEVIC
COPY examples/ /workspace/examples
COPY examples/ /workspace/examples
COPY benchmarks/ /workspace/benchmarks
COPY benchmarks/ /workspace/benchmarks
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
CMD ["/bin/bash"]
CMD ["/bin/bash"]
Dockerfile.ppc64le
View file @
334d64d1
...
@@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \
...
@@ -29,6 +29,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \
RUN --mount=type=bind,source=.git,target=.git \
RUN --mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py install
VLLM_TARGET_DEVICE=cpu python3 setup.py install
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
WORKDIR /workspace/
WORKDIR /workspace/
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
...
...
Dockerfile.rocm
View file @
334d64d1
...
@@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
...
@@ -168,4 +168,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
if ls libs/*.whl; then \
if ls libs/*.whl; then \
python3 -m pip install libs/*.whl; fi
python3 -m pip install libs/*.whl; fi
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
CMD ["/bin/bash"]
CMD ["/bin/bash"]
Dockerfile.tpu
View file @
334d64d1
...
@@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
...
@@ -22,4 +22,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
-r requirements-tpu.txt
-r requirements-tpu.txt
RUN python3 setup.py develop
RUN python3 setup.py develop
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
CMD ["/bin/bash"]
CMD ["/bin/bash"]
Dockerfile.xpu
View file @
334d64d1
...
@@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
...
@@ -64,5 +64,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
ENV VLLM_USAGE_SOURCE production-docker-image \
ENV VLLM_USAGE_SOURCE production-docker-image \
TRITON_XPU_PROFILE 1
TRITON_XPU_PROFILE 1
# install development dependencies (for testing)
RUN python3 -m pip install -e tests/vllm_test_utils
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
tests/entrypoints/llm/test_lazy_outlines.py
View file @
334d64d1
import
sys
import
sys
from
vllm_test_utils
import
blame
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.distributed
import
cleanup_dist_env_and_memory
def
test_lazy_outlines
(
sample_regex
):
def
run_normal
():
"""If users don't use guided decoding, outlines should not be imported.
"""
prompts
=
[
prompts
=
[
"Hello, my name is"
,
"Hello, my name is"
,
"The president of the United States is"
,
"The president of the United States is"
,
...
@@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex):
...
@@ -25,13 +25,12 @@ def test_lazy_outlines(sample_regex):
generated_text
=
output
.
outputs
[
0
].
text
generated_text
=
output
.
outputs
[
0
].
text
print
(
f
"Prompt:
{
prompt
!
r
}
, Generated text:
{
generated_text
!
r
}
"
)
print
(
f
"Prompt:
{
prompt
!
r
}
, Generated text:
{
generated_text
!
r
}
"
)
# make sure outlines is not imported
assert
'outlines'
not
in
sys
.
modules
# Destroy the LLM object and free up the GPU memory.
# Destroy the LLM object and free up the GPU memory.
del
llm
del
llm
cleanup_dist_env_and_memory
()
cleanup_dist_env_and_memory
()
def
run_lmfe
(
sample_regex
):
# Create an LLM with guided decoding enabled.
# Create an LLM with guided decoding enabled.
llm
=
LLM
(
model
=
"facebook/opt-125m"
,
llm
=
LLM
(
model
=
"facebook/opt-125m"
,
enforce_eager
=
True
,
enforce_eager
=
True
,
...
@@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex):
...
@@ -51,5 +50,15 @@ def test_lazy_outlines(sample_regex):
generated_text
=
output
.
outputs
[
0
].
text
generated_text
=
output
.
outputs
[
0
].
text
print
(
f
"Prompt:
{
prompt
!
r
}
, Generated text:
{
generated_text
!
r
}
"
)
print
(
f
"Prompt:
{
prompt
!
r
}
, Generated text:
{
generated_text
!
r
}
"
)
def
test_lazy_outlines
(
sample_regex
):
"""If users don't use guided decoding, outlines should not be imported.
"""
# make sure outlines is not imported
# make sure outlines is not imported
assert
'outlines'
not
in
sys
.
modules
module_name
=
"outlines"
with
blame
(
lambda
:
module_name
in
sys
.
modules
)
as
result
:
run_normal
()
run_lmfe
(
sample_regex
)
assert
not
result
.
found
,
(
f
"Module
{
module_name
}
is already imported, the"
f
" first import location is:
\n
{
result
.
trace_stack
}
"
)
tests/test_lazy_torch_compile.py
View file @
334d64d1
# Description: Test the lazy import module
# Description: Test the lazy import module
# The utility function cannot be placed in `vllm.utils`
# The utility function cannot be placed in `vllm.utils`
# this needs to be a standalone script
# this needs to be a standalone script
import
contextlib
import
dataclasses
import
sys
import
sys
import
traceback
from
typing
import
Callable
,
Generator
@
dataclasses
.
dataclass
class
BlameResult
:
found
:
bool
=
False
trace_stack
:
str
=
""
@
contextlib
.
contextmanager
def
blame
(
func
:
Callable
)
->
Generator
[
BlameResult
,
None
,
None
]:
"""
Trace the function calls to find the first function that satisfies the
condition. The trace stack will be stored in the result.
Usage:
```python
with blame(lambda: some_condition()) as result:
# do something
if result.found:
print(result.trace_stack)
"""
result
=
BlameResult
()
def
_trace_calls
(
frame
,
event
,
arg
=
None
):
nonlocal
result
if
event
in
[
'call'
,
'return'
]:
# for every function call or return
try
:
# Temporarily disable the trace function
sys
.
settrace
(
None
)
# check condition here
if
not
result
.
found
and
func
():
result
.
found
=
True
result
.
trace_stack
=
""
.
join
(
traceback
.
format_stack
())
# Re-enable the trace function
sys
.
settrace
(
_trace_calls
)
except
NameError
:
# modules are deleted during shutdown
pass
return
_trace_calls
sys
.
settrace
(
_trace_calls
)
yield
result
sys
.
settrace
(
None
)
from
vllm_test_utils
import
blame
module_name
=
"torch._inductor.async_compile"
module_name
=
"torch._inductor.async_compile"
...
...
tests/vllm_test_utils/setup.py
0 → 100644
View file @
334d64d1
from
setuptools
import
setup
setup
(
name
=
'vllm_test_utils'
,
version
=
'0.1'
,
packages
=
[
'vllm_test_utils'
],
)
tests/vllm_test_utils/vllm_test_utils/__init__.py
0 → 100644
View file @
334d64d1
"""
vllm_utils is a package for vLLM testing utilities.
It does not import any vLLM modules.
"""
from
.blame
import
BlameResult
,
blame
__all__
=
[
"blame"
,
"BlameResult"
]
tests/vllm_test_utils/vllm_test_utils/blame.py
0 → 100644
View file @
334d64d1
import
contextlib
import
dataclasses
import
sys
import
traceback
from
typing
import
Callable
,
Generator
@
dataclasses
.
dataclass
class
BlameResult
:
found
:
bool
=
False
trace_stack
:
str
=
""
@
contextlib
.
contextmanager
def
blame
(
func
:
Callable
)
->
Generator
[
BlameResult
,
None
,
None
]:
"""
Trace the function calls to find the first function that satisfies the
condition. The trace stack will be stored in the result.
Usage:
```python
with blame(lambda: some_condition()) as result:
# do something
if result.found:
print(result.trace_stack)
"""
result
=
BlameResult
()
def
_trace_calls
(
frame
,
event
,
arg
=
None
):
nonlocal
result
if
event
in
[
'call'
,
'return'
]:
# for every function call or return
try
:
# Temporarily disable the trace function
sys
.
settrace
(
None
)
# check condition here
if
not
result
.
found
and
func
():
result
.
found
=
True
result
.
trace_stack
=
""
.
join
(
traceback
.
format_stack
())
# Re-enable the trace function
sys
.
settrace
(
_trace_calls
)
except
NameError
:
# modules are deleted during shutdown
pass
return
_trace_calls
sys
.
settrace
(
_trace_calls
)
yield
result
sys
.
settrace
(
None
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment