Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e3b881ae
Commit
e3b881ae
authored
Jan 16, 2025
by
Baber
Browse files
add evaluator
parent
8181f43c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
12 deletions
+40
-12
lm_eval/tasks/mathvista/utils.py
lm_eval/tasks/mathvista/utils.py
+40
-12
No files found.
lm_eval/tasks/mathvista/utils.py
View file @
e3b881ae
import
re
from
typing
import
Optional
import
requests
# from api_model import make_concurrent_requests
from
Levenshtein
import
distance
API_KEY
=
"your_openai_api_key"
API_URL
=
"https://api.openai.com/v1/chat/completions"
# required for external LM call
DEMO_PROMPT
=
"""
...
...
@@ -47,6 +53,30 @@ Extracted answer: B
"""
# Function to send a single request to the OpenAI API
def
send_request
(
prompt
:
str
):
try
:
headers
=
{
"Authorization"
:
f
"Bearer
{
API_KEY
}
"
,
"Content-Type"
:
"application/json"
,
}
data
=
{
"model"
:
"gpt-4"
,
"messages"
:
[
{
"role"
:
"user"
,
"content"
:
prompt
},
],
"max_tokens"
:
1024
,
}
response
=
requests
.
post
(
API_URL
,
headers
=
headers
,
json
=
data
)
response
.
raise_for_status
()
result
=
response
.
json
()
return
result
[
"choices"
][
0
][
"message"
][
"content"
]
except
Exception
as
e
:
print
(
f
"An error occurred while requesting:
{
e
}
"
)
return
None
def
create_test_prompt
(
demo_prompt
,
query
,
response
):
demo_prompt
=
demo_prompt
.
strip
()
test_prompt
=
f
"
{
query
}
\n\n
{
response
}
"
...
...
@@ -152,8 +182,8 @@ def extract_answer(response: str, problem: dict, quick_extract=True) -> str:
question_type
=
problem
[
"question_type"
]
answer_type
=
problem
[
"answer_type"
]
choices
=
problem
[
"choices"
]
#
query = problem["query"]
#
pid = problem["pid"]
query
=
problem
[
"query"
]
pid
=
problem
[
"pid"
]
if
response
==
""
:
return
""
...
...
@@ -187,16 +217,14 @@ def extract_answer(response: str, problem: dict, quick_extract=True) -> str:
pass
# general extraction
# try:
# full_prompt = create_test_prompt(DEMO_PROMPT, query, response)
# extraction = make_concurrent_requests(full_prompt)
# return extraction
# except Exception:
# print(
# f"Error in extracting answer for problem: {pid} with response: {response}"
# )
# # logging.info(f"Error in extracting answer for problem: {pid} with response: {response}")
# # logging.info(e)
try
:
full_prompt
=
create_test_prompt
(
DEMO_PROMPT
,
query
,
response
)
extraction
=
send_request
(
full_prompt
)
return
extraction
except
Exception
:
print
(
f
"Error in extracting answer for problem:
{
pid
}
with response:
{
response
}
"
)
return
""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment