Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c34eeec5
Unverified
Commit
c34eeec5
authored
Mar 06, 2025
by
Brayden Zhong
Committed by
GitHub
Mar 07, 2025
Browse files
[Bugfix] Correctly call `cudaProfilerStop` in benchmarks script (#14183)
Signed-off-by:
Brayden Zhong
<
b8zhong@uwaterloo.ca
>
parent
ad60bbb2
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
5 additions
and
6 deletions
+5
-6
benchmarks/kernels/benchmark_layernorm.py
benchmarks/kernels/benchmark_layernorm.py
+1
-1
benchmarks/kernels/benchmark_lora.py
benchmarks/kernels/benchmark_lora.py
+0
-1
benchmarks/kernels/benchmark_machete.py
benchmarks/kernels/benchmark_machete.py
+1
-2
benchmarks/kernels/benchmark_moe.py
benchmarks/kernels/benchmark_moe.py
+1
-0
benchmarks/kernels/benchmark_paged_attention.py
benchmarks/kernels/benchmark_paged_attention.py
+1
-1
benchmarks/kernels/benchmark_quant.py
benchmarks/kernels/benchmark_quant.py
+1
-1
No files found.
benchmarks/kernels/benchmark_layernorm.py
View file @
c34eeec5
...
@@ -40,7 +40,7 @@ def main(num_tokens: int,
...
@@ -40,7 +40,7 @@ def main(num_tokens: int,
end_time
=
time
.
perf_counter
()
end_time
=
time
.
perf_counter
()
if
profile
:
if
profile
:
torch
.
cuda
.
cudart
().
cudaProfilerSt
art
()
torch
.
cuda
.
cudart
().
cudaProfilerSt
op
()
return
(
end_time
-
start_time
)
/
num_iters
return
(
end_time
-
start_time
)
/
num_iters
# Warmup.
# Warmup.
...
...
benchmarks/kernels/benchmark_lora.py
View file @
c34eeec5
...
@@ -153,7 +153,6 @@ def ref_group_gemm(ref_out: torch.Tensor, input: torch.Tensor,
...
@@ -153,7 +153,6 @@ def ref_group_gemm(ref_out: torch.Tensor, input: torch.Tensor,
result
=
torch
.
nn
.
functional
.
linear
(
x
,
w
)
result
=
torch
.
nn
.
functional
.
linear
(
x
,
w
)
result
*=
scaling
result
*=
scaling
out_list
.
append
(
result
)
out_list
.
append
(
result
)
torch
.
cat
(
out_list
,
dim
=
0
)
cat_result
=
torch
.
cat
(
out_list
,
dim
=
0
)
cat_result
=
torch
.
cat
(
out_list
,
dim
=
0
)
...
...
benchmarks/kernels/benchmark_machete.py
View file @
c34eeec5
...
@@ -45,7 +45,6 @@ def terse_type_name(dt):
...
@@ -45,7 +45,6 @@ def terse_type_name(dt):
torch
.
float16
:
"fp16"
,
torch
.
float16
:
"fp16"
,
torch
.
int8
:
"int8"
,
torch
.
int8
:
"int8"
,
torch
.
float8_e4m3fn
:
"fp8"
,
torch
.
float8_e4m3fn
:
"fp8"
,
torch
.
bfloat16
:
"bf16"
,
torch
.
float
:
"float"
,
torch
.
float
:
"float"
,
torch
.
int
:
"int"
,
torch
.
int
:
"int"
,
}[
dt
]
}[
dt
]
...
@@ -259,7 +258,7 @@ def machete_create_bench_fn(bt: BenchmarkTensors,
...
@@ -259,7 +258,7 @@ def machete_create_bench_fn(bt: BenchmarkTensors,
return
lambda
:
ops
.
machete_mm
(
return
lambda
:
ops
.
machete_mm
(
a
=
bt
.
a
,
a
=
bt
.
a
,
b_q
=
bt
.
w_q
,
b_q
=
w_q
,
b_type
=
bt
.
wtype
,
b_type
=
bt
.
wtype
,
b_group_scales
=
bt
.
w_g_s
,
b_group_scales
=
bt
.
w_g_s
,
b_group_zeros
=
w_g_zp
,
b_group_zeros
=
w_g_zp
,
...
...
benchmarks/kernels/benchmark_moe.py
View file @
c34eeec5
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
argparse
import
json
import
time
import
time
from
contextlib
import
nullcontext
from
contextlib
import
nullcontext
from
datetime
import
datetime
from
datetime
import
datetime
...
...
benchmarks/kernels/benchmark_paged_attention.py
View file @
c34eeec5
...
@@ -176,7 +176,7 @@ def main(
...
@@ -176,7 +176,7 @@ def main(
end_time
=
time
.
perf_counter
()
end_time
=
time
.
perf_counter
()
if
profile
:
if
profile
:
torch
.
cuda
.
cudart
().
cudaProfilerSt
art
()
torch
.
cuda
.
cudart
().
cudaProfilerSt
op
()
return
(
end_time
-
start_time
)
/
num_iters
return
(
end_time
-
start_time
)
/
num_iters
# Warmup.
# Warmup.
...
...
benchmarks/kernels/benchmark_quant.py
View file @
c34eeec5
...
@@ -40,7 +40,7 @@ def main(num_tokens: int,
...
@@ -40,7 +40,7 @@ def main(num_tokens: int,
end_time
=
time
.
perf_counter
()
end_time
=
time
.
perf_counter
()
if
profile
:
if
profile
:
torch
.
cuda
.
cudart
().
cudaProfilerSt
art
()
torch
.
cuda
.
cudart
().
cudaProfilerSt
op
()
return
(
end_time
-
start_time
)
/
num_iters
return
(
end_time
-
start_time
)
/
num_iters
# Warmup.
# Warmup.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment