Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
33 additions
and
0 deletions
+33
-0
benchmarks/benchmark_latency.py
benchmarks/benchmark_latency.py
+1
-0
benchmarks/benchmark_long_document_qa_throughput.py
benchmarks/benchmark_long_document_qa_throughput.py
+1
-0
benchmarks/benchmark_prefix_caching.py
benchmarks/benchmark_prefix_caching.py
+1
-0
benchmarks/benchmark_prioritization.py
benchmarks/benchmark_prioritization.py
+1
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+1
-0
benchmarks/benchmark_serving_guided.py
benchmarks/benchmark_serving_guided.py
+1
-0
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+1
-0
benchmarks/cutlass_benchmarks/sparse_benchmarks.py
benchmarks/cutlass_benchmarks/sparse_benchmarks.py
+2
-0
benchmarks/cutlass_benchmarks/utils.py
benchmarks/cutlass_benchmarks/utils.py
+2
-0
benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
+2
-0
benchmarks/cutlass_benchmarks/weight_shapes.py
benchmarks/cutlass_benchmarks/weight_shapes.py
+2
-0
benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py
benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py
+2
-0
benchmarks/disagg_benchmarks/round_robin_proxy.py
benchmarks/disagg_benchmarks/round_robin_proxy.py
+2
-0
benchmarks/disagg_benchmarks/visualize_benchmark_results.py
benchmarks/disagg_benchmarks/visualize_benchmark_results.py
+2
-0
benchmarks/fused_kernels/layernorm_rms_benchmarks.py
benchmarks/fused_kernels/layernorm_rms_benchmarks.py
+2
-0
benchmarks/kernels/benchmark_aqlm.py
benchmarks/kernels/benchmark_aqlm.py
+2
-0
benchmarks/kernels/benchmark_layernorm.py
benchmarks/kernels/benchmark_layernorm.py
+2
-0
benchmarks/kernels/benchmark_lora.py
benchmarks/kernels/benchmark_lora.py
+2
-0
benchmarks/kernels/benchmark_machete.py
benchmarks/kernels/benchmark_machete.py
+2
-0
benchmarks/kernels/benchmark_marlin.py
benchmarks/kernels/benchmark_marlin.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
benchmarks/benchmark_latency.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Benchmark the latency of processing a single batch of requests."""
import
argparse
import
dataclasses
...
...
benchmarks/benchmark_long_document_qa_throughput.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Offline benchmark to test the long document QA throughput.
...
...
benchmarks/benchmark_prefix_caching.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Benchmark the efficiency of prefix caching.
...
...
benchmarks/benchmark_prioritization.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline prioritization."""
import
argparse
import
dataclasses
...
...
benchmarks/benchmark_serving.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
r
"""Benchmark online serving throughput.
On the server side, run one of the following commands:
...
...
benchmarks/benchmark_serving_guided.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
r
"""Benchmark online serving throughput with guided decoding.
On the server side, run one of the following commands:
...
...
benchmarks/benchmark_throughput.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline inference throughput."""
import
argparse
import
dataclasses
...
...
benchmarks/cutlass_benchmarks/sparse_benchmarks.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
copy
import
itertools
...
...
benchmarks/cutlass_benchmarks/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Cutlass bench utils
from
typing
import
Iterable
,
Tuple
...
...
benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
copy
import
itertools
...
...
benchmarks/cutlass_benchmarks/weight_shapes.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Weight Shapes are in the format
# ([K, N], TP_SPLIT_DIM)
# Example:
...
...
benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
aiohttp
...
...
benchmarks/disagg_benchmarks/round_robin_proxy.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
asyncio
import
itertools
...
...
benchmarks/disagg_benchmarks/visualize_benchmark_results.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
json
import
matplotlib.pyplot
as
plt
...
...
benchmarks/fused_kernels/layernorm_rms_benchmarks.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pickle
as
pkl
import
time
from
dataclasses
import
dataclass
...
...
benchmarks/kernels/benchmark_aqlm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
sys
from
typing
import
Optional
...
...
benchmarks/kernels/benchmark_layernorm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
time
import
torch
...
...
benchmarks/kernels/benchmark_lora.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
copy
import
json
...
...
benchmarks/kernels/benchmark_machete.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
copy
import
itertools
...
...
benchmarks/kernels/benchmark_marlin.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
torch
...
...
Prev
1
2
3
4
5
6
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment