Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
19c824f9
Commit
19c824f9
authored
Mar 20, 2025
by
SkqLiao
Browse files
change cpu-infer due to actual cpu cores on self-hosted server.
parent
bad334fa
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
29 additions
and
123 deletions
+29
-123
ktransformers/tests/score.py
ktransformers/tests/score.py
+29
-123
No files found.
ktransformers/tests/score.py
View file @
19c824f9
import
subprocess
import
subprocess
import
time
import
time
import
requests
import
sys
import
os
def
wait_for_server
(
base_url
:
str
,
timeout
:
int
=
None
)
->
None
:
start_time
=
time
.
time
()
while
True
:
try
:
response
=
requests
.
get
(
f
"
{
base_url
}
/v1/models"
,
headers
=
{
"Authorization"
:
"Bearer None"
},
)
if
response
.
status_code
==
200
:
time
.
sleep
(
5
)
print
(
"Server is ready."
)
break
except
requests
.
exceptions
.
RequestException
:
time
.
sleep
(
1
)
if
timeout
and
time
.
time
()
-
start_time
>
timeout
:
raise
TimeoutError
(
"Server did not become ready within timeout period"
)
server_cmd
=
[
server_cmd
=
[
"numactl"
,
"-N"
,
"1"
,
"-m"
,
"1"
,
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers"
,
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers"
,
"--model_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/config"
,
"--model_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/config"
,
"--gguf_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/"
,
"--gguf_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/"
,
"--port"
,
"10002"
,
"--port"
,
"10002"
,
"--cpu
_
infer"
,
"
6
4"
"--cpu
-
infer"
,
"4
8
"
]
]
print
(
"Starting ktransformers server..."
)
print
(
"Starting ktransformers server..."
)
print
(
" "
.
join
(
server_cmd
))
server_process
=
subprocess
.
Popen
(
server_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
)
with
open
(
"/tmp/server_log.txt"
,
"w"
)
as
f
:
server_process
=
subprocess
.
Popen
(
server_cmd
,
stdout
=
f
,
stderr
=
f
,
text
=
True
)
while
True
:
output
=
server_process
.
stdout
.
readline
()
try
:
if
not
output
:
wait_for_server
(
"http://localhost:10002"
,
timeout
=
300
)
break
print
(
output
.
strip
())
eval_cmd
=
[
"python"
,
"ktransformers/tests/humaneval/eval_api.py"
]
if
"Uvicorn running on http://0.0.0.0:10002"
in
output
:
print
(
"Running eval_api.py..."
)
print
(
"Server started successfully!"
)
print
(
f
"Command:
{
' '
.
join
(
eval_cmd
)
}
"
)
break
env
=
os
.
environ
.
copy
()
eval_cmd
=
[
"python"
,
"ktransformers/tests/humaneval/eval_api.py"
]
env
[
"PYTHONUNBUFFERED"
]
=
"1"
print
(
"Running eval_api.py..."
)
eval_process
=
subprocess
.
run
(
eval_cmd
,
capture_output
=
True
,
text
=
True
)
eval_process
=
subprocess
.
Popen
(
eval_cmd
,
print
(
"Stopping ktransformers server..."
)
stdout
=
subprocess
.
PIPE
,
server_process
.
terminate
()
stderr
=
subprocess
.
PIPE
,
server_process
.
wait
()
text
=
True
,
bufsize
=
1
,
evaluate_cmd
=
[
env
=
env
,
"evaluate_functional_correctness"
,
universal_newlines
=
True
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
)
]
print
(
"Running evaluate_functional_correctness..."
)
import
threading
evaluate_process
=
subprocess
.
run
(
evaluate_cmd
,
capture_output
=
True
,
text
=
True
)
import
queue
def
enqueue_output
(
out
,
queue
):
for
line
in
iter
(
out
.
readline
,
''
):
queue
.
put
(
line
)
out
.
close
()
stdout_queue
=
queue
.
Queue
()
stderr_queue
=
queue
.
Queue
()
stdout_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stdout
,
stdout_queue
))
stderr_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stderr
,
stderr_queue
))
stdout_thread
.
daemon
=
True
stderr_thread
.
daemon
=
True
stdout_thread
.
start
()
stderr_thread
.
start
()
while
eval_process
.
poll
()
is
None
:
try
:
line
=
stdout_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
flush
=
True
)
except
queue
.
Empty
:
pass
try
:
line
=
stderr_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
except
queue
.
Empty
:
pass
time
.
sleep
(
1
)
while
not
stdout_queue
.
empty
():
print
(
stdout_queue
.
get
(),
end
=
''
,
flush
=
True
)
while
not
stderr_queue
.
empty
():
print
(
stderr_queue
.
get
(),
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
eval_process
.
wait
()
print
(
f
"eval_api.py completed with exit code:
{
eval_process
.
returncode
}
"
)
evaluate_cmd
=
[
"evaluate_functional_correctness"
,
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
]
print
(
"Running evaluate_functional_correctness..."
)
print
(
f
"Command:
{
' '
.
join
(
evaluate_cmd
)
}
"
)
evaluate_process
=
subprocess
.
Popen
(
evaluate_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
bufsize
=
1
,
universal_newlines
=
True
)
for
line
in
evaluate_process
.
stdout
:
print
(
line
,
end
=
''
,
flush
=
True
)
for
line
in
evaluate_process
.
stderr
:
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
evaluate_process
.
wait
()
print
(
f
"evaluate_functional_correctness completed with exit code:
{
evaluate_process
.
returncode
}
"
)
if
evaluate_process
.
returncode
!=
0
:
print
(
f
"evaluate_functional_correctness exited with code
{
evaluate_process
.
returncode
}
"
)
sys
.
exit
(
evaluate_process
.
returncode
)
finally
:
print
(
"Evaluation Output:"
)
print
(
"Stopping ktransformers server..."
)
print
(
evaluate_process
.
stdout
)
server_process
.
terminate
()
print
(
evaluate_process
.
stderr
)
try
:
server_process
.
wait
(
timeout
=
30
)
except
subprocess
.
TimeoutExpired
:
print
(
"Server did not terminate gracefully, forcing..."
)
server_process
.
kill
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment