Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
bc369b25
Commit
bc369b25
authored
Mar 19, 2025
by
SkqLiao
Browse files
add CI/CD for human eval score benchmarking
parent
c66ca657
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
159 additions
and
0 deletions
+159
-0
.github/workflows/score.yml
.github/workflows/score.yml
+24
-0
ktransformers/tests/score.py
ktransformers/tests/score.py
+135
-0
No files found.
.github/workflows/score.yml
0 → 100644
View file @
bc369b25
name
:
Human Eval Score KTransformers
run-name
:
Human Eval Score KTransformers
on
:
workflow_dispatch
jobs
:
Human-Eval-Score-KTransformers
:
runs-on
:
self-hosted
steps
:
-
run
:
echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
-
run
:
echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
-
name
:
Check out repository code
uses
:
actions/checkout@v4
-
run
:
echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
-
name
:
Human Eval Run
run
:
|
set -e
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
conda activate ktransformers-dev
export PATH=/usr/local/cuda-12.4/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
export CUDA_HOME=/usr/local/cuda-12.4
cd ${{ github.workspace }}
python transformers/tests/score.py
-
run
:
echo "This job's status is ${{ job.status }}."
ktransformers/tests/score.py
0 → 100644
View file @
bc369b25
import
subprocess
import
time
import
requests
import
sys
import
os
def
wait_for_server
(
base_url
:
str
,
timeout
:
int
=
None
)
->
None
:
start_time
=
time
.
time
()
while
True
:
try
:
response
=
requests
.
get
(
f
"
{
base_url
}
/v1/models"
,
headers
=
{
"Authorization"
:
"Bearer None"
},
)
if
response
.
status_code
==
200
:
time
.
sleep
(
5
)
print
(
"Server is ready."
)
break
except
requests
.
exceptions
.
RequestException
:
time
.
sleep
(
1
)
if
timeout
and
time
.
time
()
-
start_time
>
timeout
:
raise
TimeoutError
(
"Server did not become ready within timeout period"
)
server_cmd
=
[
"numactl"
,
"-N"
,
"1"
,
"-m"
,
"1"
,
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers"
,
"--model_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/config"
,
"--gguf_path"
,
"/home/qujing3/models/DeepSeek-R1-Q4_K_M/"
,
"--port"
,
"10002"
,
"--cpu_infer"
,
"64"
]
print
(
"Starting ktransformers server..."
)
print
(
" "
.
join
(
server_cmd
))
with
open
(
"/tmp/server_log.txt"
,
"w"
)
as
f
:
server_process
=
subprocess
.
Popen
(
server_cmd
,
stdout
=
f
,
stderr
=
f
,
text
=
True
)
try
:
wait_for_server
(
"http://localhost:10002"
,
timeout
=
300
)
eval_cmd
=
[
"python"
,
"ktransformers/tests/humaneval/eval_api.py"
]
print
(
"Running eval_api.py..."
)
print
(
f
"Command:
{
' '
.
join
(
eval_cmd
)
}
"
)
env
=
os
.
environ
.
copy
()
env
[
"PYTHONUNBUFFERED"
]
=
"1"
eval_process
=
subprocess
.
Popen
(
eval_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
bufsize
=
1
,
env
=
env
,
universal_newlines
=
True
)
import
threading
import
queue
def
enqueue_output
(
out
,
queue
):
for
line
in
iter
(
out
.
readline
,
''
):
queue
.
put
(
line
)
out
.
close
()
stdout_queue
=
queue
.
Queue
()
stderr_queue
=
queue
.
Queue
()
stdout_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stdout
,
stdout_queue
))
stderr_thread
=
threading
.
Thread
(
target
=
enqueue_output
,
args
=
(
eval_process
.
stderr
,
stderr_queue
))
stdout_thread
.
daemon
=
True
stderr_thread
.
daemon
=
True
stdout_thread
.
start
()
stderr_thread
.
start
()
while
eval_process
.
poll
()
is
None
:
try
:
line
=
stdout_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
flush
=
True
)
except
queue
.
Empty
:
pass
try
:
line
=
stderr_queue
.
get_nowait
()
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
except
queue
.
Empty
:
pass
time
.
sleep
(
1
)
while
not
stdout_queue
.
empty
():
print
(
stdout_queue
.
get
(),
end
=
''
,
flush
=
True
)
while
not
stderr_queue
.
empty
():
print
(
stderr_queue
.
get
(),
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
eval_process
.
wait
()
print
(
f
"eval_api.py completed with exit code:
{
eval_process
.
returncode
}
"
)
evaluate_cmd
=
[
"evaluate_functional_correctness"
,
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
]
print
(
"Running evaluate_functional_correctness..."
)
print
(
f
"Command:
{
' '
.
join
(
evaluate_cmd
)
}
"
)
evaluate_process
=
subprocess
.
Popen
(
evaluate_cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
bufsize
=
1
,
universal_newlines
=
True
)
for
line
in
evaluate_process
.
stdout
:
print
(
line
,
end
=
''
,
flush
=
True
)
for
line
in
evaluate_process
.
stderr
:
print
(
line
,
end
=
''
,
file
=
sys
.
stderr
,
flush
=
True
)
evaluate_process
.
wait
()
print
(
f
"evaluate_functional_correctness completed with exit code:
{
evaluate_process
.
returncode
}
"
)
if
evaluate_process
.
returncode
!=
0
:
print
(
f
"evaluate_functional_correctness exited with code
{
evaluate_process
.
returncode
}
"
)
sys
.
exit
(
evaluate_process
.
returncode
)
finally
:
print
(
"Stopping ktransformers server..."
)
server_process
.
terminate
()
try
:
server_process
.
wait
(
timeout
=
30
)
except
subprocess
.
TimeoutExpired
:
print
(
"Server did not terminate gracefully, forcing..."
)
server_process
.
kill
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment