Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
21063c11
Unverified
Commit
21063c11
authored
Nov 06, 2024
by
Aaron Pham
Committed by
GitHub
Nov 06, 2024
Browse files
[CI/Build] drop support for Python 3.8 EOL (#8464)
Signed-off-by:
Aaron Pham
<
contact@aarnphm.xyz
>
parent
4be3a451
Changes
115
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
105 additions
and
130 deletions
+105
-130
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
...ly-benchmarks/scripts/convert-results-json-to-markdown.py
+5
-5
.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
...e/nightly-benchmarks/scripts/generate-nightly-markdown.py
+2
-2
.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
...ite/nightly-benchmarks/scripts/summary-nightly-results.py
+2
-2
.github/workflows/mypy.yaml
.github/workflows/mypy.yaml
+1
-1
.github/workflows/publish.yml
.github/workflows/publish.yml
+1
-1
.github/workflows/ruff.yml
.github/workflows/ruff.yml
+16
-16
.github/workflows/yapf.yml
.github/workflows/yapf.yml
+13
-13
.readthedocs.yaml
.readthedocs.yaml
+5
-6
CMakeLists.txt
CMakeLists.txt
+18
-18
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+7
-15
benchmarks/kernels/benchmark_machete.py
benchmarks/kernels/benchmark_machete.py
+3
-3
csrc/quantization/machete/generate.py
csrc/quantization/machete/generate.py
+4
-4
docs/source/getting_started/installation.rst
docs/source/getting_started/installation.rst
+5
-5
pyproject.toml
pyproject.toml
+2
-2
setup.py
setup.py
+4
-5
tests/compile/piecewise/test_toy_llama.py
tests/compile/piecewise/test_toy_llama.py
+2
-2
tests/conftest.py
tests/conftest.py
+8
-21
tests/core/block/test_prefix_caching_block.py
tests/core/block/test_prefix_caching_block.py
+5
-7
tests/kernels/test_mamba_ssm.py
tests/kernels/test_mamba_ssm.py
+1
-1
tests/models/decoder_only/vision_language/mm_processor_kwargs/test_qwen.py
...der_only/vision_language/mm_processor_kwargs/test_qwen.py
+1
-1
No files found.
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
View file @
21063c11
...
@@ -56,7 +56,7 @@ serving_column_mapping = {
...
@@ -56,7 +56,7 @@ serving_column_mapping = {
def
read_markdown
(
file
):
def
read_markdown
(
file
):
if
os
.
path
.
exists
(
file
):
if
os
.
path
.
exists
(
file
):
with
open
(
file
,
"r"
)
as
f
:
with
open
(
file
)
as
f
:
return
f
.
read
()
+
"
\n
"
return
f
.
read
()
+
"
\n
"
else
:
else
:
return
f
"
{
file
}
not found.
\n
"
return
f
"
{
file
}
not found.
\n
"
...
@@ -75,14 +75,14 @@ if __name__ == "__main__":
...
@@ -75,14 +75,14 @@ if __name__ == "__main__":
# collect results
# collect results
for
test_file
in
results_folder
.
glob
(
"*.json"
):
for
test_file
in
results_folder
.
glob
(
"*.json"
):
with
open
(
test_file
,
"r"
)
as
f
:
with
open
(
test_file
)
as
f
:
raw_result
=
json
.
loads
(
f
.
read
())
raw_result
=
json
.
loads
(
f
.
read
())
if
"serving"
in
str
(
test_file
):
if
"serving"
in
str
(
test_file
):
# this result is generated via `benchmark_serving.py`
# this result is generated via `benchmark_serving.py`
# attach the benchmarking command to raw_result
# attach the benchmarking command to raw_result
with
open
(
test_file
.
with_suffix
(
".commands"
)
,
"r"
)
as
f
:
with
open
(
test_file
.
with_suffix
(
".commands"
))
as
f
:
command
=
json
.
loads
(
f
.
read
())
command
=
json
.
loads
(
f
.
read
())
raw_result
.
update
(
command
)
raw_result
.
update
(
command
)
...
@@ -97,7 +97,7 @@ if __name__ == "__main__":
...
@@ -97,7 +97,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_latency.py`
# this result is generated via `benchmark_latency.py`
# attach the benchmarking command to raw_result
# attach the benchmarking command to raw_result
with
open
(
test_file
.
with_suffix
(
".commands"
)
,
"r"
)
as
f
:
with
open
(
test_file
.
with_suffix
(
".commands"
))
as
f
:
command
=
json
.
loads
(
f
.
read
())
command
=
json
.
loads
(
f
.
read
())
raw_result
.
update
(
command
)
raw_result
.
update
(
command
)
...
@@ -119,7 +119,7 @@ if __name__ == "__main__":
...
@@ -119,7 +119,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_throughput.py`
# this result is generated via `benchmark_throughput.py`
# attach the benchmarking command to raw_result
# attach the benchmarking command to raw_result
with
open
(
test_file
.
with_suffix
(
".commands"
)
,
"r"
)
as
f
:
with
open
(
test_file
.
with_suffix
(
".commands"
))
as
f
:
command
=
json
.
loads
(
f
.
read
())
command
=
json
.
loads
(
f
.
read
())
raw_result
.
update
(
command
)
raw_result
.
update
(
command
)
...
...
.buildkite/nightly-benchmarks/scripts/generate-nightly-markdown.py
View file @
21063c11
...
@@ -72,7 +72,7 @@ def main(args):
...
@@ -72,7 +72,7 @@ def main(args):
# collect results
# collect results
for
test_file
in
results_folder
.
glob
(
"*_nightly_results.json"
):
for
test_file
in
results_folder
.
glob
(
"*_nightly_results.json"
):
with
open
(
test_file
,
"r"
)
as
f
:
with
open
(
test_file
)
as
f
:
results
=
results
+
json
.
loads
(
f
.
read
())
results
=
results
+
json
.
loads
(
f
.
read
())
# generate markdown table
# generate markdown table
...
@@ -80,7 +80,7 @@ def main(args):
...
@@ -80,7 +80,7 @@ def main(args):
md_table
=
tabulate
(
df
,
headers
=
'keys'
,
tablefmt
=
'pipe'
,
showindex
=
False
)
md_table
=
tabulate
(
df
,
headers
=
'keys'
,
tablefmt
=
'pipe'
,
showindex
=
False
)
with
open
(
args
.
description
,
"r"
)
as
f
:
with
open
(
args
.
description
)
as
f
:
description
=
f
.
read
()
description
=
f
.
read
()
description
=
description
.
format
(
description
=
description
.
format
(
...
...
.buildkite/nightly-benchmarks/scripts/summary-nightly-results.py
View file @
21063c11
...
@@ -36,11 +36,11 @@ if __name__ == "__main__":
...
@@ -36,11 +36,11 @@ if __name__ == "__main__":
# collect results
# collect results
for
test_file
in
results_folder
.
glob
(
"*.json"
):
for
test_file
in
results_folder
.
glob
(
"*.json"
):
with
open
(
test_file
,
"r"
)
as
f
:
with
open
(
test_file
)
as
f
:
raw_result
=
json
.
loads
(
f
.
read
())
raw_result
=
json
.
loads
(
f
.
read
())
# attach the benchmarking command to raw_result
# attach the benchmarking command to raw_result
with
open
(
test_file
.
with_suffix
(
".commands"
)
,
"r"
)
as
f
:
with
open
(
test_file
.
with_suffix
(
".commands"
))
as
f
:
command
=
json
.
loads
(
f
.
read
())
command
=
json
.
loads
(
f
.
read
())
raw_result
.
update
(
command
)
raw_result
.
update
(
command
)
...
...
.github/workflows/mypy.yaml
View file @
21063c11
...
@@ -25,7 +25,7 @@ jobs:
...
@@ -25,7 +25,7 @@ jobs:
runs-on
:
ubuntu-latest
runs-on
:
ubuntu-latest
strategy
:
strategy
:
matrix
:
matrix
:
python-version
:
[
"
3.8"
,
"
3.9"
,
"
3.10"
,
"
3.11"
,
"
3.12"
]
python-version
:
[
"
3.9"
,
"
3.10"
,
"
3.11"
,
"
3.12"
]
steps
:
steps
:
-
uses
:
actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
# v4.2.1
-
uses
:
actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871
# v4.2.1
-
name
:
Set up Python ${{ matrix.python-version }}
-
name
:
Set up Python ${{ matrix.python-version }}
...
...
.github/workflows/publish.yml
View file @
21063c11
...
@@ -48,7 +48,7 @@ jobs:
...
@@ -48,7 +48,7 @@ jobs:
fail-fast
:
false
fail-fast
:
false
matrix
:
matrix
:
os
:
[
'
ubuntu-20.04'
]
os
:
[
'
ubuntu-20.04'
]
python-version
:
[
'
3.8'
,
'
3.9'
,
'
3.10'
,
'
3.11'
,
'
3.12'
]
python-version
:
[
'
3.9'
,
'
3.10'
,
'
3.11'
,
'
3.12'
]
pytorch-version
:
[
'
2.4.0'
]
# Must be the most recent version that meets requirements-cuda.txt.
pytorch-version
:
[
'
2.4.0'
]
# Must be the most recent version that meets requirements-cuda.txt.
cuda-version
:
[
'
11.8'
,
'
12.1'
]
cuda-version
:
[
'
11.8'
,
'
12.1'
]
...
...
.github/workflows/ruff.yml
View file @
21063c11
.github/workflows/yapf.yml
View file @
21063c11
.readthedocs.yaml
View file @
21063c11
...
@@ -6,7 +6,7 @@ version: 2
...
@@ -6,7 +6,7 @@ version: 2
build
:
build
:
os
:
ubuntu-22.04
os
:
ubuntu-22.04
tools
:
tools
:
python
:
"
3.
8"
python
:
'
3.
9'
sphinx
:
sphinx
:
configuration
:
docs/source/conf.py
configuration
:
docs/source/conf.py
...
@@ -19,4 +19,3 @@ formats: []
...
@@ -19,4 +19,3 @@ formats: []
python
:
python
:
install
:
install
:
-
requirements
:
docs/requirements-docs.txt
-
requirements
:
docs/requirements-docs.txt
CMakeLists.txt
View file @
21063c11
benchmarks/backend_request_func.py
View file @
21063c11
...
@@ -79,7 +79,7 @@ async def async_request_tgi(
...
@@ -79,7 +79,7 @@ async def async_request_tgi(
# any data, we should skip it.
# any data, we should skip it.
if
chunk_bytes
.
startswith
(
":"
):
if
chunk_bytes
.
startswith
(
":"
):
continue
continue
chunk
=
remove
_
prefix
(
chunk_bytes
,
"data:"
)
chunk
=
chunk_bytes
.
removeprefix
(
"data:"
)
data
=
json
.
loads
(
chunk
)
data
=
json
.
loads
(
chunk
)
timestamp
=
time
.
perf_counter
()
timestamp
=
time
.
perf_counter
()
...
@@ -144,7 +144,7 @@ async def async_request_trt_llm(
...
@@ -144,7 +144,7 @@ async def async_request_trt_llm(
if
not
chunk_bytes
:
if
not
chunk_bytes
:
continue
continue
chunk
=
remove_prefix
(
chunk_bytes
.
decode
(
"utf-8"
)
,
chunk
=
chunk_bytes
.
decode
(
"utf-8"
)
.
removeprefix
(
"data:"
)
"data:"
)
data
=
json
.
loads
(
chunk
)
data
=
json
.
loads
(
chunk
)
...
@@ -261,7 +261,7 @@ async def async_request_openai_completions(
...
@@ -261,7 +261,7 @@ async def async_request_openai_completions(
if
not
chunk_bytes
:
if
not
chunk_bytes
:
continue
continue
chunk
=
remove_prefix
(
chunk_bytes
.
decode
(
"utf-8"
)
,
chunk
=
chunk_bytes
.
decode
(
"utf-8"
)
.
removeprefix
(
"data: "
)
"data: "
)
if
chunk
==
"[DONE]"
:
if
chunk
==
"[DONE]"
:
latency
=
time
.
perf_counter
()
-
st
latency
=
time
.
perf_counter
()
-
st
...
@@ -349,7 +349,7 @@ async def async_request_openai_chat_completions(
...
@@ -349,7 +349,7 @@ async def async_request_openai_chat_completions(
if
not
chunk_bytes
:
if
not
chunk_bytes
:
continue
continue
chunk
=
remove_prefix
(
chunk_bytes
.
decode
(
"utf-8"
)
,
chunk
=
chunk_bytes
.
decode
(
"utf-8"
)
.
removeprefix
(
"data: "
)
"data: "
)
if
chunk
==
"[DONE]"
:
if
chunk
==
"[DONE]"
:
latency
=
time
.
perf_counter
()
-
st
latency
=
time
.
perf_counter
()
-
st
...
@@ -389,14 +389,6 @@ async def async_request_openai_chat_completions(
...
@@ -389,14 +389,6 @@ async def async_request_openai_chat_completions(
return
output
return
output
# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix)
# introduced in Python 3.9
def
remove_prefix
(
text
:
str
,
prefix
:
str
)
->
str
:
if
text
.
startswith
(
prefix
):
return
text
[
len
(
prefix
):]
return
text
def
get_model
(
pretrained_model_name_or_path
:
str
)
->
str
:
def
get_model
(
pretrained_model_name_or_path
:
str
)
->
str
:
if
os
.
getenv
(
'VLLM_USE_MODELSCOPE'
,
'False'
).
lower
()
==
'true'
:
if
os
.
getenv
(
'VLLM_USE_MODELSCOPE'
,
'False'
).
lower
()
==
'true'
:
from
modelscope
import
snapshot_download
from
modelscope
import
snapshot_download
...
...
benchmarks/kernels/benchmark_machete.py
View file @
21063c11
...
@@ -269,10 +269,10 @@ def run_square_bench(args):
...
@@ -269,10 +269,10 @@ def run_square_bench(args):
def
run_range_bench
(
args
):
def
run_range_bench
(
args
):
m_start
,
k_start
,
n_start
=
[
int
(
x
)
for
x
in
args
.
dim_start
.
split
(
","
)
]
m_start
,
k_start
,
n_start
=
(
int
(
x
)
for
x
in
args
.
dim_start
.
split
(
","
)
)
m_end
,
k_end
,
n_end
=
[
int
(
x
)
for
x
in
args
.
dim_end
.
split
(
","
)
]
m_end
,
k_end
,
n_end
=
(
int
(
x
)
for
x
in
args
.
dim_end
.
split
(
","
)
)
m_increment
,
k_increment
,
n_increment
=
\
m_increment
,
k_increment
,
n_increment
=
\
[
int
(
x
)
for
x
in
args
.
dim_increment
.
split
(
","
)
]
(
int
(
x
)
for
x
in
args
.
dim_increment
.
split
(
","
)
)
Ms
=
list
(
range
(
m_start
,
m_end
+
1
,
m_increment
))
Ms
=
list
(
range
(
m_start
,
m_end
+
1
,
m_increment
))
Ks
=
list
(
range
(
k_start
,
k_end
+
1
,
k_increment
))
Ks
=
list
(
range
(
k_start
,
k_end
+
1
,
k_increment
))
Ns
=
list
(
range
(
n_start
,
n_end
+
1
,
n_increment
))
Ns
=
list
(
range
(
n_start
,
n_end
+
1
,
n_increment
))
...
...
csrc/quantization/machete/generate.py
View file @
21063c11
...
@@ -468,7 +468,7 @@ def generate():
...
@@ -468,7 +468,7 @@ def generate():
impl_configs
=
[]
impl_configs
=
[]
GPTQ_kernel_type_configs
=
list
(
GPTQ_kernel_type_configs
=
list
(
(
TypeConfig
(
TypeConfig
(
element_a
=
element_a
,
element_a
=
element_a
,
element_b
=
element_b
,
element_b
=
element_b
,
element_b_scale
=
element_a
,
element_b_scale
=
element_a
,
...
@@ -476,7 +476,7 @@ def generate():
...
@@ -476,7 +476,7 @@ def generate():
element_d
=
element_a
,
element_d
=
element_a
,
accumulator
=
DataType
.
f32
,
accumulator
=
DataType
.
f32
,
)
for
element_b
in
(
VLLMDataType
.
u4b8
,
VLLMDataType
.
u8b128
)
)
for
element_b
in
(
VLLMDataType
.
u4b8
,
VLLMDataType
.
u8b128
)
for
element_a
in
(
DataType
.
f16
,
DataType
.
bf16
))
)
for
element_a
in
(
DataType
.
f16
,
DataType
.
bf16
))
GPTQ_kernel_specializations
=
[
GPTQ_kernel_specializations
=
[
Specialization
(
with_C
=
False
,
with_zeropoints
=
False
,
with_scales
=
True
)
Specialization
(
with_C
=
False
,
with_zeropoints
=
False
,
with_scales
=
True
)
...
@@ -490,7 +490,7 @@ def generate():
...
@@ -490,7 +490,7 @@ def generate():
]
]
AWQ_kernel_type_configs
=
list
(
AWQ_kernel_type_configs
=
list
(
(
TypeConfig
(
TypeConfig
(
element_a
=
element_a
,
element_a
=
element_a
,
element_b
=
element_b
,
element_b
=
element_b
,
element_b_scale
=
element_a
,
element_b_scale
=
element_a
,
...
@@ -498,7 +498,7 @@ def generate():
...
@@ -498,7 +498,7 @@ def generate():
element_d
=
element_a
,
element_d
=
element_a
,
accumulator
=
DataType
.
f32
,
accumulator
=
DataType
.
f32
,
)
for
element_b
in
(
DataType
.
u4
,
DataType
.
u8
)
)
for
element_b
in
(
DataType
.
u4
,
DataType
.
u8
)
for
element_a
in
(
DataType
.
f16
,
DataType
.
bf16
))
)
for
element_a
in
(
DataType
.
f16
,
DataType
.
bf16
))
AWQ_kernel_specializations
=
[
AWQ_kernel_specializations
=
[
Specialization
(
with_C
=
False
,
with_zeropoints
=
True
,
with_scales
=
True
)
Specialization
(
with_C
=
False
,
with_zeropoints
=
True
,
with_scales
=
True
)
...
...
docs/source/getting_started/installation.rst
View file @
21063c11
...
@@ -10,7 +10,7 @@ Requirements
...
@@ -10,7 +10,7 @@ Requirements
============
============
* OS: Linux
* OS: Linux
* Python: 3.
8
- 3.12
* Python: 3.
9 -
- 3.12
* GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
* GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
Install released versions
Install released versions
...
...
pyproject.toml
View file @
21063c11
...
@@ -34,7 +34,7 @@ select = [
...
@@ -34,7 +34,7 @@ select = [
# Pyflakes
# Pyflakes
"F"
,
"F"
,
# pyupgrade
# pyupgrade
#
"UP",
"UP"
,
# flake8-bugbear
# flake8-bugbear
"B"
,
"B"
,
# flake8-simplify
# flake8-simplify
...
@@ -55,7 +55,7 @@ ignore = [
...
@@ -55,7 +55,7 @@ ignore = [
]
]
[tool.mypy]
[tool.mypy]
python_version
=
"3.
8
"
python_version
=
"3.
9
"
ignore_missing_imports
=
true
ignore_missing_imports
=
true
check_untyped_defs
=
true
check_untyped_defs
=
true
...
...
setup.py
View file @
21063c11
import
importlib.util
import
importlib.util
import
io
import
logging
import
logging
import
os
import
os
import
re
import
re
...
@@ -327,7 +326,7 @@ def get_neuronxcc_version():
...
@@ -327,7 +326,7 @@ def get_neuronxcc_version():
"__init__.py"
)
"__init__.py"
)
# Check if the command was executed successfully
# Check if the command was executed successfully
with
open
(
version_file
,
"rt"
)
as
fp
:
with
open
(
version_file
)
as
fp
:
content
=
fp
.
read
()
content
=
fp
.
read
()
# Extract the version using a regular expression
# Extract the version using a regular expression
...
@@ -404,7 +403,8 @@ def read_readme() -> str:
...
@@ -404,7 +403,8 @@ def read_readme() -> str:
"""Read the README file if present."""
"""Read the README file if present."""
p
=
get_path
(
"README.md"
)
p
=
get_path
(
"README.md"
)
if
os
.
path
.
isfile
(
p
):
if
os
.
path
.
isfile
(
p
):
return
io
.
open
(
get_path
(
"README.md"
),
"r"
,
encoding
=
"utf-8"
).
read
()
with
open
(
get_path
(
"README.md"
),
encoding
=
"utf-8"
)
as
f
:
return
f
.
read
()
else
:
else
:
return
""
return
""
...
@@ -498,7 +498,6 @@ setup(
...
@@ -498,7 +498,6 @@ setup(
"Documentation"
:
"https://vllm.readthedocs.io/en/latest/"
,
"Documentation"
:
"https://vllm.readthedocs.io/en/latest/"
,
},
},
classifiers
=
[
classifiers
=
[
"Programming Language :: Python :: 3.8"
,
"Programming Language :: Python :: 3.9"
,
"Programming Language :: Python :: 3.9"
,
"Programming Language :: Python :: 3.10"
,
"Programming Language :: Python :: 3.10"
,
"Programming Language :: Python :: 3.11"
,
"Programming Language :: Python :: 3.11"
,
...
@@ -512,7 +511,7 @@ setup(
...
@@ -512,7 +511,7 @@ setup(
],
],
packages
=
find_packages
(
exclude
=
(
"benchmarks"
,
"csrc"
,
"docs"
,
"examples"
,
packages
=
find_packages
(
exclude
=
(
"benchmarks"
,
"csrc"
,
"docs"
,
"examples"
,
"tests*"
)),
"tests*"
)),
python_requires
=
">=3.
8
"
,
python_requires
=
">=3.
9
"
,
install_requires
=
get_requirements
(),
install_requires
=
get_requirements
(),
ext_modules
=
ext_modules
,
ext_modules
=
ext_modules
,
extras_require
=
{
extras_require
=
{
...
...
tests/compile/piecewise/test_toy_llama.py
View file @
21063c11
...
@@ -429,8 +429,8 @@ def benchmark():
...
@@ -429,8 +429,8 @@ def benchmark():
# print in tabular format
# print in tabular format
print
(
"batch size
\t
eager mode
\t
full cudagraph
\t
piecewise cudagraph"
)
print
(
"batch size
\t
eager mode
\t
full cudagraph
\t
piecewise cudagraph"
)
for
b
in
cudagraph_sizes
:
for
b
in
cudagraph_sizes
:
print
(
(
f
"
{
b
}
\t
{
eager_time
[
b
]:.
3
f
}
\t
{
full_cudagraph_time
[
b
]:.
3
f
}
"
print
(
f
"
{
b
}
\t
{
eager_time
[
b
]:.
3
f
}
\t
{
full_cudagraph_time
[
b
]:.
3
f
}
"
f
"
\t
{
piecewise_cudagraph_time
[
b
]:.
3
f
}
"
)
)
f
"
\t
{
piecewise_cudagraph_time
[
b
]:.
3
f
}
"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tests/conftest.py
View file @
21063c11
import
json
import
json
import
os
import
os
import
sys
import
tempfile
import
tempfile
from
collections
import
UserList
from
collections
import
UserList
from
enum
import
Enum
from
enum
import
Enum
...
@@ -52,7 +51,7 @@ PromptVideoInput = _PromptMultiModalInput[np.ndarray]
...
@@ -52,7 +51,7 @@ PromptVideoInput = _PromptMultiModalInput[np.ndarray]
def
_read_prompts
(
filename
:
str
)
->
List
[
str
]:
def
_read_prompts
(
filename
:
str
)
->
List
[
str
]:
with
open
(
filename
,
"r"
)
as
f
:
with
open
(
filename
)
as
f
:
prompts
=
f
.
readlines
()
prompts
=
f
.
readlines
()
return
prompts
return
prompts
...
@@ -62,13 +61,7 @@ class _ImageAssetPrompts(TypedDict):
...
@@ -62,13 +61,7 @@ class _ImageAssetPrompts(TypedDict):
cherry_blossom
:
str
cherry_blossom
:
str
if
sys
.
version_info
<
(
3
,
9
):
class
_ImageAssetsBase
(
UserList
[
ImageAsset
]):
# UserList cannot be subscripted
class
_ImageAssetsBase
(
UserList
):
pass
else
:
class
_ImageAssetsBase
(
UserList
[
ImageAsset
]):
pass
pass
...
@@ -94,13 +87,7 @@ class _VideoAssetPrompts(TypedDict):
...
@@ -94,13 +87,7 @@ class _VideoAssetPrompts(TypedDict):
sample_demo_1
:
str
sample_demo_1
:
str
if
sys
.
version_info
<
(
3
,
9
):
class
_VideoAssetsBase
(
UserList
[
VideoAsset
]):
# UserList cannot be subscripted
class
_VideoAssetsBase
(
UserList
):
pass
else
:
class
_VideoAssetsBase
(
UserList
[
VideoAsset
]):
pass
pass
...
@@ -958,7 +945,7 @@ def dummy_opt_path():
...
@@ -958,7 +945,7 @@ def dummy_opt_path():
"*.msgpack"
"*.msgpack"
])
])
assert
os
.
path
.
exists
(
json_path
)
assert
os
.
path
.
exists
(
json_path
)
with
open
(
json_path
,
"r"
)
as
f
:
with
open
(
json_path
)
as
f
:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
config
[
"architectures"
]
=
[
"MyOPTForCausalLM"
]
config
[
"architectures"
]
=
[
"MyOPTForCausalLM"
]
with
open
(
json_path
,
"w"
)
as
f
:
with
open
(
json_path
,
"w"
)
as
f
:
...
@@ -977,7 +964,7 @@ def dummy_llava_path():
...
@@ -977,7 +964,7 @@ def dummy_llava_path():
"*.msgpack"
"*.msgpack"
])
])
assert
os
.
path
.
exists
(
json_path
)
assert
os
.
path
.
exists
(
json_path
)
with
open
(
json_path
,
"r"
)
as
f
:
with
open
(
json_path
)
as
f
:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
config
[
"architectures"
]
=
[
"MyLlava"
]
config
[
"architectures"
]
=
[
"MyLlava"
]
with
open
(
json_path
,
"w"
)
as
f
:
with
open
(
json_path
,
"w"
)
as
f
:
...
@@ -996,7 +983,7 @@ def dummy_gemma2_embedding_path():
...
@@ -996,7 +983,7 @@ def dummy_gemma2_embedding_path():
"*.msgpack"
"*.msgpack"
])
])
assert
os
.
path
.
exists
(
json_path
)
assert
os
.
path
.
exists
(
json_path
)
with
open
(
json_path
,
"r"
)
as
f
:
with
open
(
json_path
)
as
f
:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
config
[
"architectures"
]
=
[
"MyGemma2Embedding"
]
config
[
"architectures"
]
=
[
"MyGemma2Embedding"
]
with
open
(
json_path
,
"w"
)
as
f
:
with
open
(
json_path
,
"w"
)
as
f
:
...
...
tests/core/block/test_prefix_caching_block.py
View file @
21063c11
...
@@ -99,13 +99,11 @@ class TestPrefixCachingBlock:
...
@@ -99,13 +99,11 @@ class TestPrefixCachingBlock:
token_ids
=
[
random
.
randint
(
0
,
50_000
)
for
_
in
range
(
num_tokens
)]
token_ids
=
[
random
.
randint
(
0
,
50_000
)
for
_
in
range
(
num_tokens
)]
first_chain
,
second_chain
=
[
first_chain
,
second_chain
=
(
TestPrefixCachingBlock
.
create_chain
(
TestPrefixCachingBlock
.
create_chain
(
block_size
=
block_size
,
block_size
=
block_size
,
token_ids
=
token_ids
,
token_ids
=
token_ids
,
num_empty_trailing_blocks
=
num_empty_trailing_blocks
)
num_empty_trailing_blocks
=
num_empty_trailing_blocks
)
for
_
in
range
(
2
)
for
_
in
range
(
2
))
]
for
first_chain_block
,
second_chain_block
in
zip
(
for
first_chain_block
,
second_chain_block
in
zip
(
first_chain
,
second_chain
):
first_chain
,
second_chain
):
...
...
tests/kernels/test_mamba_ssm.py
View file @
21063c11
...
@@ -510,7 +510,7 @@ def test_selective_scan_varlen(with_padding, is_variable_B, is_variable_C,
...
@@ -510,7 +510,7 @@ def test_selective_scan_varlen(with_padding, is_variable_B, is_variable_C,
for
var
in
(
u_ref
,
delta_ref
,
B_ref
,
C_ref
,
z_ref
)
for
var
in
(
u_ref
,
delta_ref
,
B_ref
,
C_ref
,
z_ref
)
]
]
for
i
in
range
(
len
(
seqlens
[
0
])):
for
i
in
range
(
len
(
seqlens
[
0
])):
u_s
,
delta_s
,
B_s
,
C_s
,
z_s
=
[
v
[
i
].
unsqueeze
(
0
)
for
v
in
splits
]
u_s
,
delta_s
,
B_s
,
C_s
,
z_s
=
(
v
[
i
].
unsqueeze
(
0
)
for
v
in
splits
)
if
padded_state_indices
[
i
]
==
PAD_SLOT_ID
:
if
padded_state_indices
[
i
]
==
PAD_SLOT_ID
:
continue
continue
out_ref_s
,
_
=
selective_scan_ref
(
out_ref_s
,
_
=
selective_scan_ref
(
...
...
tests/models/decoder_only/vision_language/mm_processor_kwargs/test_qwen.py
View file @
21063c11
...
@@ -104,7 +104,7 @@ def test_input_mapper_valid_mm_data(input_mapper_for_qwen,
...
@@ -104,7 +104,7 @@ def test_input_mapper_valid_mm_data(input_mapper_for_qwen,
# Sad path tests for the multimodal input processor and mapper, respectively
# Sad path tests for the multimodal input processor and mapper, respectively
@
pytest
.
mark
.
parametrize
(
"mm_data"
,
[
@
pytest
.
mark
.
parametrize
(
"mm_data"
,
[
{
{
"image"
:
torch
.
rand
(
(
5
)
)
"image"
:
torch
.
rand
(
5
)
},
},
{
{
"image"
:
torch
.
rand
((
5
,
5
,
5
,
5
,
5
))
"image"
:
torch
.
rand
((
5
,
5
,
5
,
5
,
5
))
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment