Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
59053354
"lib/vscode:/vscode.git/clone" did not exist on "55c6525f5b40e9dac57a764f9f36a912f4de25cc"
Unverified
Commit
59053354
authored
Aug 20, 2025
by
Jason Zhou
Committed by
GitHub
Aug 20, 2025
Browse files
feat: add --tokenizer_path to profile_endpoint.py (#2550)
parent
344c21dc
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
53 additions
and
5 deletions
+53
-5
benchmarks/profiler/profile_endpoint.py
benchmarks/profiler/profile_endpoint.py
+11
-0
benchmarks/profiler/profile_sla.py
benchmarks/profiler/profile_sla.py
+2
-0
benchmarks/profiler/utils/genai_perf.py
benchmarks/profiler/utils/genai_perf.py
+19
-3
benchmarks/profiler/utils/profile_decode.py
benchmarks/profiler/utils/profile_decode.py
+9
-0
benchmarks/profiler/utils/profile_prefill.py
benchmarks/profiler/utils/profile_prefill.py
+12
-2
No files found.
benchmarks/profiler/profile_endpoint.py
View file @
59053354
...
...
@@ -35,6 +35,13 @@ if __name__ == "__main__":
required
=
True
,
help
=
"model name"
,
)
parser
.
add_argument
(
"--tokenizer_path"
,
type
=
str
,
required
=
False
,
default
=
""
,
help
=
"tokenizer path"
,
)
parser
.
add_argument
(
"--url"
,
type
=
str
,
...
...
@@ -75,10 +82,13 @@ if __name__ == "__main__":
args
=
parser
.
parse_args
()
os
.
makedirs
(
args
.
work_dir
,
exist_ok
=
True
)
if
args
.
tokenizer_path
==
""
:
args
.
tokenizer_path
=
args
.
model_name
if
args
.
mode
==
"prefill"
:
profile_prefill
(
args
.
work_dir
,
args
.
model_name
,
args
.
tokenizer_path
,
args
.
url
,
args
.
num_gpus
,
args
.
max_context_length
,
...
...
@@ -89,6 +99,7 @@ if __name__ == "__main__":
profile_decode
(
args
.
work_dir
,
args
.
model_name
,
args
.
tokenizer_path
,
args
.
url
,
args
.
num_gpus
,
args
.
max_kv_tokens
,
...
...
benchmarks/profiler/profile_sla.py
View file @
59053354
...
...
@@ -421,6 +421,7 @@ async def run_profile(args):
profile_prefill
(
work_dir
,
model_name
,
model_name
,
base_url
,
best_prefill_tp
,
args
.
max_context_length
,
...
...
@@ -476,6 +477,7 @@ async def run_profile(args):
profile_decode
(
work_dir
,
model_name
,
model_name
,
base_url
,
best_decode_tp
,
max_kv_tokens
,
...
...
benchmarks/profiler/utils/genai_perf.py
View file @
59053354
...
...
@@ -34,6 +34,7 @@ def _get_common_genai_perf_cmd(
artifact_dir
,
seed
=
100
,
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
tokenizer
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
base_url
=
"http://localhost:8000"
,
):
return
[
...
...
@@ -42,7 +43,7 @@ def _get_common_genai_perf_cmd(
"--model"
,
model
,
"--tokenizer"
,
model
,
tokenizer
,
"--endpoint-type"
,
"chat"
,
"--endpoint"
,
...
...
@@ -68,6 +69,7 @@ def get_prefill_genai_perf_cmd(
artifact_dir
,
seed
=
100
,
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
tokenizer
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
osl
=
5
,
base_url
=
"http://localhost:8000"
,
):
...
...
@@ -75,6 +77,7 @@ def get_prefill_genai_perf_cmd(
artifact_dir
,
seed
,
model
,
tokenizer
,
base_url
,
)
+
[
"--synthetic-input-tokens-mean"
,
...
...
@@ -103,12 +106,14 @@ def get_decode_genai_perf_cmd(
num_request
,
seed
=
100
,
model
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
tokenizer
=
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
,
base_url
=
"http://localhost:8000"
,
):
return
_get_common_genai_perf_cmd
(
artifact_dir
,
seed
,
model
,
tokenizer
,
base_url
,
)
+
[
"--synthetic-input-tokens-mean"
,
...
...
@@ -147,11 +152,19 @@ def get_gap_result(artifact_dir: str) -> dict:
def
benchmark_prefill
(
isl
,
genai_perf_artifact_dir
,
model_name
,
base_url
=
"http://localhost:8000"
isl
,
genai_perf_artifact_dir
,
model_name
,
tokenizer
,
base_url
=
"http://localhost:8000"
,
):
logger
.
info
(
f
"Running genai-perf with isl
{
isl
}
"
)
genai_perf_cmd
=
get_prefill_genai_perf_cmd
(
isl
,
genai_perf_artifact_dir
,
model
=
model_name
,
base_url
=
base_url
isl
,
genai_perf_artifact_dir
,
model
=
model_name
,
tokenizer
=
tokenizer
,
base_url
=
base_url
,
)
print
(
f
"genai-perf cmd:
{
genai_perf_cmd
}
"
)
# import pdb; pdb.set_trace()
...
...
@@ -179,6 +192,7 @@ def benchmark_decode(
num_request
,
genai_perf_artifact_dir
,
model_name
,
tokenizer
,
base_url
=
"http://localhost:8000"
,
):
logger
.
info
(
f
"Profiling decode with num_request
{
num_request
}
..."
)
...
...
@@ -194,6 +208,7 @@ def benchmark_decode(
num_request
,
seed
=
seed
,
model
=
model_name
,
tokenizer
=
tokenizer
,
base_url
=
base_url
,
)
gap_process
=
subprocess
.
Popen
(
...
...
@@ -211,6 +226,7 @@ def benchmark_decode(
num_request
,
seed
=
seed
,
model
=
model_name
,
tokenizer
=
tokenizer
,
base_url
=
base_url
,
)
gap_process
=
subprocess
.
Popen
(
...
...
benchmarks/profiler/utils/profile_decode.py
View file @
59053354
...
...
@@ -21,6 +21,7 @@ logger.addHandler(console_handler)
def
profile_decode
(
work_dir
,
model_name
,
tokenizer
,
url
,
num_gpus
,
max_kv_tokens
,
...
...
@@ -41,6 +42,13 @@ def profile_decode(
(
max_context_length
-
osl
)
//
interpolation_granularity
,
):
max_concurrency
=
max_kv_tokens
//
(
isl
+
osl
)
if
max_concurrency
//
interpolation_granularity
==
0
:
logger
.
warning
(
f
"max_concurrency
{
max_concurrency
}
is too small for"
f
" interpolation granularity
{
interpolation_granularity
}
."
f
" max_kv_tokens
{
max_kv_tokens
}
, isl
{
isl
}
, osl
{
osl
}
"
)
break
sweep_num_request
=
range
(
1
,
max_concurrency
,
...
...
@@ -54,6 +62,7 @@ def profile_decode(
num_request
,
genai_perf_artifact_dir
,
model_name
,
tokenizer
,
base_url
=
url
,
)
if
gap_result
is
not
None
:
...
...
benchmarks/profiler/utils/profile_prefill.py
View file @
59053354
...
...
@@ -19,7 +19,13 @@ logger.addHandler(console_handler)
def
profile_prefill
(
work_dir
,
model_name
,
url
,
num_gpus
,
max_context_length
,
interpolation_granularity
work_dir
,
model_name
,
tokenizer
,
url
,
num_gpus
,
max_context_length
,
interpolation_granularity
,
):
prefill_isl
=
[]
prefill_ttft
=
[]
...
...
@@ -32,7 +38,11 @@ def profile_prefill(
# run genai-perf
genai_perf_artifact_dir
=
f
"
{
work_dir
}
/gap_isl
{
isl
}
"
gap_result
=
benchmark_prefill
(
isl
,
genai_perf_artifact_dir
,
model_name
,
base_url
=
url
isl
,
genai_perf_artifact_dir
,
model_name
,
tokenizer
,
base_url
=
url
,
)
if
gap_result
is
not
None
:
ttft
=
gap_result
[
"time_to_first_token"
][
"avg"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment