Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
07a05550
Commit
07a05550
authored
Feb 18, 2025
by
liam
Browse files
⚡
fix device and add test
parent
f029588b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
207 additions
and
1 deletion
+207
-1
.gitignore
.gitignore
+4
-0
Makefile
Makefile
+5
-1
ktransformers/server/backend/interfaces/ktransformers.py
ktransformers/server/backend/interfaces/ktransformers.py
+3
-0
ktransformers/tests/mmlu_test.py
ktransformers/tests/mmlu_test.py
+195
-0
No files found.
.gitignore
View file @
07a05550
...
@@ -23,3 +23,7 @@ tmp1.txt
...
@@ -23,3 +23,7 @@ tmp1.txt
test_65_300_1536.txt
test_65_300_1536.txt
test.txt
test.txt
book
book
ktransformers/tests/mmlu_result_silicon.json
ktransformers/tests/chat_txt.txt
mmlu_result_q4km.json
mmlu_result_q4km.log
Makefile
View file @
07a05550
...
@@ -18,4 +18,8 @@ dev_install:
...
@@ -18,4 +18,8 @@ dev_install:
echo
"Installing ktransformers"
echo
"Installing ktransformers"
KTRANSFORMERS_FORCE_BUILD
=
TRUE pip
install
-e
.
-v
--no-build-isolation
KTRANSFORMERS_FORCE_BUILD
=
TRUE pip
install
-e
.
-v
--no-build-isolation
echo
"Installation completed successfully"
echo
"Installation completed successfully"
\ No newline at end of file
install_numa
:
USE_NUMA
=
1 make dev_install
install_no_numa
:
env
-u
USE_NUMA make dev_install
ktransformers/server/backend/interfaces/ktransformers.py
View file @
07a05550
...
@@ -78,6 +78,7 @@ class KTransformersInterface(TransformersInterface):
...
@@ -78,6 +78,7 @@ class KTransformersInterface(TransformersInterface):
torch_device
=
get_device
(
"blk.0.self_attn"
,
device_map
)
torch_device
=
get_device
(
"blk.0.self_attn"
,
device_map
)
torch_device
=
"cuda:0"
if
torch_device
==
"cuda"
else
torch_device
torch_device
=
"cuda:0"
if
torch_device
==
"cuda"
else
torch_device
global
warm_uped
global
warm_uped
torch
.
cuda
.
set_device
(
torch_device
)
if
self
.
args
.
use_cuda_graph
and
warm_uped
==
True
:
if
self
.
args
.
use_cuda_graph
and
warm_uped
==
True
:
if
not
hasattr
(
self
,
"cuda_graph_runner"
):
if
not
hasattr
(
self
,
"cuda_graph_runner"
):
...
@@ -130,6 +131,7 @@ class KTransformersInterface(TransformersInterface):
...
@@ -130,6 +131,7 @@ class KTransformersInterface(TransformersInterface):
logger
.
debug
(
f
"input_ids:
{
input_ids
.
shape
}
"
)
logger
.
debug
(
f
"input_ids:
{
input_ids
.
shape
}
"
)
device
=
self
.
device_map
.
get
(
"blk.0.self_attn"
,
{}).
get
(
"generate_device"
,
"cuda:0"
)
device
=
self
.
device_map
.
get
(
"blk.0.self_attn"
,
{}).
get
(
"generate_device"
,
"cuda:0"
)
device
=
"cuda:0"
if
device
==
"cuda"
else
device
if
is_new
:
if
is_new
:
self
.
cache
.
reset
()
self
.
cache
.
reset
()
...
@@ -161,6 +163,7 @@ class KTransformersInterface(TransformersInterface):
...
@@ -161,6 +163,7 @@ class KTransformersInterface(TransformersInterface):
if
not
(
type
(
self
)
is
TransformersInterface
):
if
not
(
type
(
self
)
is
TransformersInterface
):
input_ids
=
input_ids
.
to
(
"cpu"
)
input_ids
=
input_ids
.
to
(
"cpu"
)
inputs_embeds
=
self
.
model
.
model
.
embed_tokens
(
input_ids
).
to
(
device
)
inputs_embeds
=
self
.
model
.
model
.
embed_tokens
(
input_ids
).
to
(
device
)
torch
.
cuda
.
set_device
(
device
)
if
self
.
use_static_cache
:
if
self
.
use_static_cache
:
logits
=
self
.
model
(
logits
=
self
.
model
(
inputs_embeds
=
inputs_embeds
,
inputs_embeds
=
inputs_embeds
,
...
...
ktransformers/tests/mmlu_test.py
0 → 100644
View file @
07a05550
import
argparse
import
random
import
time
import
json
import
requests
import
pandas
as
pd
from
datasets
import
load_dataset
import
os
os
.
environ
[
'HF_ENDPOINT'
]
=
'https://hf-mirror.com'
os
.
environ
[
'https_proxy'
]
=
''
os
.
environ
[
'http_proxy'
]
=
''
hint
=
'There is a single choice question. Answer the question by replying A, B, C, D. No other answers are accepted. Just the letter.'
class
DataEvaluator
:
def
__init__
(
self
):
# self.template_prompt = template_prompt
self
.
data
=
[]
def
load_data
(
self
,
file_path
):
"""
Load data from a Parquet file into a list.
Each record in the Parquet file should represent an individual record.
"""
# 读取 Parquet 文件
# dataset = load_dataset('parquet', data_files=file_path)
ds
=
load_dataset
(
file_path
,
"all"
)
df
=
pd
.
DataFrame
(
ds
[
'test'
])
# print(ds)
# # ds_1 = ds['train']
# ds_2 = ds['validation']
# ds_3 = ds['test']
# # 将数据集转换为 Pandas DataFrame
# df_test = pd.DataFrame(ds['test'])
# df_val = pd.DataFrame(ds['validation'])
# for _, row in df.iterrows():
# self.data.append(row.to_dict())
# df = pd.read_parquet(file_path)
for
_
,
row
in
df
.
iterrows
():
self
.
data
.
append
(
row
.
to_dict
())
def
get_prompt
(
self
,
record
):
"""
Combine fields from a record with the template prompt to create a full prompt.
:param record: Dictionary containing fields to populate the template.
:return: A formatted prompt string.
"""
# 查看ABCD。。。的选项
options_str
=
"
\n
"
.
join
([
f
"
{
chr
(
65
+
i
)
}
.
{
opt
}
"
for
i
,
opt
in
enumerate
(
record
[
'choices'
])])
prompt
=
hint
+
"
\n
Question: "
+
record
[
'question'
]
+
"
\n
"
+
options_str
+
"
\n
Answer: '"
return
prompt
def
post_processing
(
self
,
text
):
"""
Perform post-processing on the prediction string.
:param text: The raw prediction string.
:return: Processed prediction string.
"""
text
=
text
.
lstrip
(
'
\n
'
).
split
(
'
\n
'
)[
0
]
return
text
[:
1
]
def
score
(
self
,
pred
,
answers
):
"""
Calculate scores between the prediction and the answer.
Uses ROUGE scores as the evaluation metric.
:param pred: The predicted string.
:param answer: The reference answer string.
:return: A dictionary containing ROUGE scores.
"""
for
answer
in
answers
:
if
pred
==
answer
:
return
1
return
0
# Function to generate text using API
def
generate_text
(
api_url
,
question
,
model_name
,
stream
=
False
):
headers
=
{
'accept'
:
'application/json'
,
'Content-Type'
:
'application/json'
,
# 添加 API Key
'Authorization'
:
'Bearer '
}
data
=
{
"messages"
:
[{
"content"
:
question
,
"role"
:
"user"
}],
"model"
:
model_name
,
"stream"
:
stream
,
"temperature"
:
0.6
}
print
(
"POST data:"
,
data
)
response
=
requests
.
post
(
api_url
,
headers
=
headers
,
json
=
data
)
if
response
.
status_code
==
200
:
result
=
response
.
json
()
return
result
.
get
(
'choices'
,
[{}])[
0
].
get
(
'message'
,
{}).
get
(
'content'
,
''
).
strip
()
else
:
print
(
f
"API Request failed with status code
{
response
.
status_code
}
"
)
return
None
# Main function to handle multiple evaluations
def
main
(
concurrent_requests
,
data_evaluator
:
DataEvaluator
,
result_file
,
log_file
,
api_url
,
model_name
):
start_total_time
=
time
.
time
()
total_score
=
0
results
=
[]
# 设置随机数种子
random
.
seed
(
42
)
random
.
shuffle
(
data_evaluator
.
data
)
for
i
in
range
(
min
(
concurrent_requests
,
len
(
data_evaluator
.
data
))):
# Randomly select a data item from data for each request
data_item
=
data_evaluator
.
data
[
i
]
question
=
data_evaluator
.
get_prompt
(
data_item
)
# print(question)
# Start the timer for this evaluation
start_time
=
time
.
time
()
try
:
# Generate prediction using the API
prediction
=
generate_text
(
api_url
,
question
,
model_name
)
if
prediction
is
None
:
raise
Exception
(
f
"Failed to get prediction for
{
question
}
"
)
answer
=
chr
(
data_item
[
'answer'
]
+
65
)
# Compute score
score
=
data_evaluator
.
score
(
data_evaluator
.
post_processing
(
prediction
),
answer
)
# Calculate the time taken
elapsed_time
=
time
.
time
()
-
start_time
# Collect the result data
result_data
=
{
"question_id"
:
i
,
"answer"
:
answer
,
"prediction"
:
data_evaluator
.
post_processing
(
prediction
),
"score"
:
score
,
"time"
:
elapsed_time
}
# Write results to result.json with each field on a new line
with
open
(
result_file
,
'a'
,
encoding
=
'utf-8'
)
as
f
:
json
.
dump
(
result_data
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
f
.
write
(
"
\n
"
)
# Ensure each JSON object is on a new line
results
.
append
(
result_data
)
# Aggregate scores
total_score
+=
score
except
Exception
as
e
:
print
(
f
"Error processing request
{
i
}
:
{
e
}
"
)
# Calculate total time and throughput
total_time
=
time
.
time
()
-
start_total_time
throughput
=
concurrent_requests
/
total_time
# Log the total time, throughput, and average ROUGE scores
with
open
(
log_file
,
'a'
,
encoding
=
'utf-8'
)
as
log_f
:
log_f
.
write
(
f
"Total Time:
{
total_time
:.
2
f
}
seconds
\n
"
)
log_f
.
write
(
f
"Throughput:
{
throughput
:.
2
f
}
requests per second
\n
"
)
log_f
.
write
(
f
"Average Scores:
{
total_score
/
concurrent_requests
}
\n
"
)
log_f
.
write
(
'-'
*
40
+
'
\n
'
)
print
(
f
"Results saved to
{
result_file
}
"
)
print
(
f
"Log saved to
{
log_file
}
"
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"API Generate Tester"
)
parser
.
add_argument
(
"--concurrent"
,
type
=
int
,
default
=
1000
,
help
=
"Number of concurrent evaluations"
)
parser
.
add_argument
(
"--file"
,
type
=
str
,
default
=
"cais/mmlu"
,
help
=
"Path to the mmlu.jsonl file"
)
parser
.
add_argument
(
"--result"
,
type
=
str
,
default
=
"./mmlu_result_silicon.json"
,
help
=
"Path to save the result JSON file"
)
parser
.
add_argument
(
"--log"
,
type
=
str
,
default
=
"./mmlu_result_silicon.log"
,
help
=
"Path to save the log file"
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"Pro/deepseek-ai/DeepSeek-V3"
,
help
=
"Model name or path"
)
parser
.
add_argument
(
"--api_url"
,
type
=
str
,
default
=
"http://localhost:10003/v1/chat/completions"
,
help
=
"API URL"
)
# parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
args
=
parser
.
parse_args
()
# Load the data from the provided file
# template_prompt = hint + "\nQuestion: {question}\nA. {options}\nB. {option_b}\nC. {option_c}\nD. {option_d}\nAnswer: '"
# template_prompt_pro = hint + "\nQuestion: {question}\nA. {options[0]}\nB. {options[1]}\nC. {options[2]}\nD. {options[3]}\nE. {options[4]}\nF. {options[5]}\nG. \
# {options[6]}\nH. {options[7]}\nI. {options[8]}\nJ. {options[9]}\nAnswer: '"
# Load the data from the provided file
data_evaluator
=
DataEvaluator
()
data_evaluator
.
load_data
(
args
.
file
)
# Run the main function with the specified number of concurrent evaluations
main
(
args
.
concurrent
,
data_evaluator
,
args
.
result
,
args
.
log
,
args
.
api_url
,
args
.
model
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment