Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
05f6cede
Unverified
Commit
05f6cede
authored
Mar 20, 2025
by
Jiaqi Liao
Committed by
GitHub
Mar 20, 2025
Browse files
Merge pull request #943 from SkqLiao/main
fix benchmark params for human eval benchmark
parents
ddd35d5b
6d4626a5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
126 additions
and
30 deletions
+126
-30
ktransformers/tests/score.py
ktransformers/tests/score.py
+126
-30
No files found.
ktransformers/tests/score.py
View file @
05f6cede
import
subprocess
import
time
import
requests
import
sys
import
os
def
wait_for_server
(
base_url
:
str
,
timeout
:
int
=
None
)
->
None
:
start_time
=
time
.
time
()
while
True
:
try
:
response
=
requests
.
get
(
f
"
{
base_url
}
/v1/models"
,
headers
=
{
"Authorization"
:
"Bearer None"
},
)
if
response
.
status_code
==
200
:
print
(
"Server is ready."
)
break
except
requests
.
exceptions
.
RequestException
:
time
.
sleep
(
1
)
if
timeout
and
time
.
time
()
-
start_time
>
timeout
:
raise
TimeoutError
(
"Server did not become ready within timeout period"
)
server_cmd
=
[
"numactl"
,
"-N"
,
"1"
,
"-m"
,
"1"
,
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers"
,
"--model_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/config"
,
"--gguf_path"
,
"/home/qujing3/models/DeepSeek-
R1
-Q4_K_M
/
"
,
"--gguf_path"
,
"/home/qujing3/models/DeepSeek-
V3-GGUF/DeepSeek-V3
-Q4_K_M"
,
"--port"
,
"10002"
,
"--cpu-infer"
,
"48"
"--cpu_infer"
,
"48"
,
"--optimize_config_path"
,
"ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml"
,
"--max_new_tokens"
,
"3000"
,
"--cache_lens"
,
"6000"
]
print
(
"Starting ktransformers server..."
)
server_process
=
subprocess
.
Popen
(
server_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
)
print
(
" "
.
join
(
server_cmd
))
with
open
(
"/tmp/server_log.txt"
,
"w"
)
as
f
:
server_process
=
subprocess
.
Popen
(
server_cmd
,
stdout
=
f
,
stderr
=
f
,
text
=
True
)
while
True
:
output
=
server_process
.
stdout
.
readline
()
if
not
output
:
break
print
(
output
.
strip
())
if
"Uvicorn running on http://0.0.0.0:10002"
in
output
:
print
(
"Server started successfully!"
)
break
try
:
wait_for_server
(
"http://localhost:10002"
,
timeout
=
600
)
eval_cmd
=
[
"python"
,
"ktransformers/tests/humaneval/eval_api.py"
]
print
(
"Running eval_api.py..."
)
print
(
f
"Command:
{
' '
.
join
(
eval_cmd
)
}
"
)
env
=
os
.
environ
.
copy
()
env
[
"PYTHONUNBUFFERED"
]
=
"1"
eval_process
=
subprocess
.
Popen
(
eval_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
bufsize
=
1
,
env
=
env
,
universal_newlines
=
True
)
import
threading
import
queue
def
enqueue_output
(
out
,
queue
):
for
line
in
iter
(
out
.
readline
,
''
):
queue
.
put
(
line
)
out
.
close
()
stdout_queue
=
queue
.
Queue
()
stderr_queue
=
queue
.
Queue
()
eval_cmd
=
[
"python"
,
"ktransformers/tests/humaneval/eval_api.py"
]
print
(
"Running eval_api.py..."
)
eval_process
=
subprocess
.
run
(
eval_cmd
,
capture_output
=
True
,
text
=
True
)
stdout_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stdout
,
stdout_queue
))
stderr_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stderr
,
stderr_queue
))
print
(
"Stopping ktransformers server..."
)
server_process
.
terminate
()
server_process
.
wait
()
stdout_thread
.
daemon
=
True
stderr_thread
.
daemon
=
True
stdout_thread
.
start
()
stderr_thread
.
start
()
evaluate_cmd
=
[
while
eval_process
.
poll
()
is
None
:
try
:
line
=
stdout_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
flush
=
True
)
except
queue
.
Empty
:
pass
try
:
line
=
stderr_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
except
queue
.
Empty
:
pass
time
.
sleep
(
1
)
while
not
stdout_queue
.
empty
():
print
(
stdout_queue
.
get
(),
end
=
''
,
flush
=
True
)
while
not
stderr_queue
.
empty
():
print
(
stderr_queue
.
get
(),
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
eval_process
.
wait
()
print
(
f
"eval_api.py completed with exit code:
{
eval_process
.
returncode
}
"
)
evaluate_cmd
=
[
"evaluate_functional_correctness"
,
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
]
print
(
"Running evaluate_functional_correctness..."
)
evaluate_process
=
subprocess
.
run
(
evaluate_cmd
,
capture_output
=
True
,
text
=
True
)
]
print
(
"Running evaluate_functional_correctness..."
)
print
(
f
"Command:
{
' '
.
join
(
evaluate_cmd
)
}
"
)
evaluate_process
=
subprocess
.
Popen
(
evaluate_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
bufsize
=
1
,
universal_newlines
=
True
)
for
line
in
evaluate_process
.
stdout
:
print
(
line
,
end
=
''
,
flush
=
True
)
for
line
in
evaluate_process
.
stderr
:
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
evaluate_process
.
wait
()
print
(
f
"evaluate_functional_correctness completed with exit code:
{
evaluate_process
.
returncode
}
"
)
if
evaluate_process
.
returncode
!=
0
:
print
(
f
"evaluate_functional_correctness exited with code
{
evaluate_process
.
returncode
}
"
)
sys
.
exit
(
evaluate_process
.
returncode
)
print
(
"Evaluation Output:"
)
print
(
evaluate_process
.
stdout
)
print
(
evaluate_process
.
stderr
)
finally
:
print
(
"Stopping ktransformers server..."
)
server_process
.
terminate
()
try
:
server_process
.
wait
(
timeout
=
30
)
except
subprocess
.
TimeoutExpired
:
print
(
"Server did not terminate gracefully, forcing..."
)
server_process
.
kill
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment