Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
97d31082
Commit
97d31082
authored
Jun 27, 2023
by
Matt Hoffner
Browse files
updates from feedback
parent
896bd5f9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
26 deletions
+44
-26
lm_eval/models/ggml.py
lm_eval/models/ggml.py
+37
-15
tests/test_ggml.py
tests/test_ggml.py
+7
-11
tests/testdata/ggml_test_01d366e32dd8ae86bd079b6822814dcafad69a9082e4cf4db9633eaad47933c2.pkl
...8ae86bd079b6822814dcafad69a9082e4cf4db9633eaad47933c2.pkl
+0
-0
tests/testdata/ggml_test_04e9938f35d50bceb56453089ce5c7a0738ac878d40ded36f8a1fc170ab54b18.pkl
...50bceb56453089ce5c7a0738ac878d40ded36f8a1fc170ab54b18.pkl
+0
-0
tests/testdata/ggml_test_941e4a484a2f5d4d99b45084003946423f63cc2955e9400f7153a51cbed9470a.pkl
...f5d4d99b45084003946423f63cc2955e9400f7153a51cbed9470a.pkl
+0
-0
tests/testdata/ggml_test_e7804132de7c4a26d33e65d1853c1a97d3b4b364da6bf7fcee2883953b61e6c6.pkl
...c4a26d33e65d1853c1a97d3b4b364da6bf7fcee2883953b61e6c6.pkl
+0
-0
No files found.
lm_eval/models/ggml.py
View file @
97d31082
import
requests
import
requests
import
logging
import
logging
import
time
from
lm_eval.utils
import
Reorderer
from
lm_eval.base
import
BaseLM
from
lm_eval.base
import
BaseLM
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
requests.exceptions
import
RequestException
from
requests.exceptions
import
RequestException
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
def
ggml_completion
(
base_url
,
**
kwargs
):
def
ggml_completion
(
base_url
,
retries
=
3
,
delay
=
5
,
**
kwargs
):
try
:
for
_
in
range
(
retries
):
response
=
requests
.
post
(
f
"
{
base_url
}
/v1/completions"
,
json
=
kwargs
)
try
:
response
.
raise_for_status
()
response
=
requests
.
post
(
f
"
{
base_url
}
/v1/completions"
,
json
=
kwargs
)
return
response
.
json
()
response
.
raise_for_status
()
except
RequestException
as
e
:
return
response
.
json
()
print
(
f
"RequestException:
{
e
}
"
)
except
RequestException
as
e
:
return
None
logger
.
error
(
f
"RequestException:
{
e
}
"
)
time
.
sleep
(
delay
)
# wait before retrying
else
:
raise
Exception
(
f
"Failed to get a valid response after
{
retries
}
retries. Last exception:
{
e
}
"
)
class
GGMLLM
(
BaseLM
):
class
GGMLLM
(
BaseLM
):
def
__init__
(
self
,
base_url
,
truncate
=
False
):
def
__init__
(
self
,
base_url
,
truncate
=
False
):
...
@@ -22,37 +27,54 @@ class GGMLLM(BaseLM):
...
@@ -22,37 +27,54 @@ class GGMLLM(BaseLM):
self
.
truncate
=
truncate
self
.
truncate
=
truncate
def
loglikelihood
(
self
,
requests
):
def
loglikelihood
(
self
,
requests
):
reorderer
=
Reorderer
(
requests
,
len
)
requests
=
reorderer
.
get_reordered
()
res
=
[]
res
=
[]
for
context
,
continuation
in
tqdm
(
requests
):
for
context
,
continuation
in
tqdm
(
requests
):
response
=
ggml_completion
(
self
.
base_url
,
context
=
context
,
continuation
=
continuation
)
response
=
ggml_completion
(
self
.
base_url
,
context
=
context
,
continuation
=
continuation
)
if
response
and
"choices"
in
response
and
response
[
"choices"
]:
if
response
and
"choices"
in
response
and
response
[
"choices"
]:
choice
=
response
[
"choices"
][
0
]
choice
=
response
[
"choices"
][
0
]
logprobs
=
choice
.
get
(
"logprobs"
)
logprobs
=
choice
.
get
(
"logprobs"
)
logprob
=
logprobs
[
"token_logprobs"
][
0
]
if
logprobs
and
logprobs
[
"token_logprobs"
]
else
-
1.2345
try
:
logprob
=
logprobs
[
"token_logprobs"
][
0
]
except
TypeError
:
raise
ValueError
(
"Invalid logprobs data. Expected 'logprobs' to contain 'token_logprobs' list."
)
is_greedy
=
choice
[
"finish_reason"
]
==
"length"
is_greedy
=
choice
[
"finish_reason"
]
==
"length"
res
.
append
((
logprob
,
is_greedy
))
res
.
append
((
logprob
,
is_greedy
))
else
:
else
:
logger
.
error
(
f
"Invalid response for loglikelihood. Response:
{
response
}
"
)
logger
.
error
(
f
"Invalid response for loglikelihood. Response:
{
response
}
"
)
assert
False
assert
False
return
re
s
return
re
orderer
.
get_original
(
res
)
def
greedy_until
(
self
,
requests
):
def
greedy_until
(
self
,
requests
):
if
not
requests
:
if
not
requests
:
return
[]
return
[]
reorderer
=
Reorderer
(
requests
,
len
)
requests
=
reorderer
.
get_reordered
()
res
=
[]
res
=
[]
for
request
in
tqdm
(
requests
):
for
request
in
tqdm
(
requests
):
inp
=
request
[
0
]
inp
=
request
[
0
]
request_args
=
request
[
1
]
request_args
=
request
[
1
]
until
=
request_args
[
"until"
]
until
=
request_args
[
"until"
]
response
=
ggml_completion
(
self
.
base_url
,
context
=
inp
,
stop
=
until
)
response
=
ggml_completion
(
self
.
base_url
,
context
=
inp
,
stop
=
until
)
if
response
and
"text"
in
response
:
print
(
response
);
generated_text
=
response
[
"text"
].
strip
()
if
response
and
"choices"
in
response
and
response
[
"choices"
]:
res
.
append
(
generated_text
)
choice
=
response
[
"choices"
][
0
]
if
"text"
in
choice
:
generated_text
=
choice
[
"text"
].
strip
()
res
.
append
(
generated_text
)
else
:
logger
.
error
(
f
"Invalid response for greedy_until. Response:
{
response
}
"
)
res
.
append
(
None
)
# Add default value in case of error
else
:
else
:
logger
.
error
(
f
"Invalid response for greedy_until. Response:
{
response
}
"
)
logger
.
error
(
f
"Invalid response for greedy_until. Response:
{
response
}
"
)
continue
res
.
append
(
None
)
# Add default value in case of error
return
res
return
reorderer
.
get_original
(
res
)
def
_model_call
(
self
,
inps
):
def
_model_call
(
self
,
inps
):
# Placeholder implementation
# Placeholder implementation
...
...
tests/test_ggml.py
View file @
97d31082
...
@@ -21,7 +21,7 @@ def ggml_completion_mock(base_url, **kwargs):
...
@@ -21,7 +21,7 @@ def ggml_completion_mock(base_url, **kwargs):
else
:
else
:
print
(
"The file does not exist, attempting to write..."
)
print
(
"The file does not exist, attempting to write..."
)
if
'stop'
in
kwargs
:
if
'stop'
in
kwargs
:
result
=
{
"choices"
:
[{
"logprobs"
:
{
"token_logprobs"
:
[
-
1.2345
]},
"finish_reason"
:
"length"
}]}
result
=
{
"choices"
:
[{
"text"
:
f
"generated text until
{
kwargs
[
'stop'
]
}
"
,
"logprobs"
:
{
"token_logprobs"
:
[
-
1.2345
]},
"finish_reason"
:
"length"
}]}
else
:
else
:
result
=
{
"choices"
:
[{
"logprobs"
:
{
"token_logprobs"
:
[
-
1.2345
]},
"finish_reason"
:
"length"
}]}
result
=
{
"choices"
:
[{
"logprobs"
:
{
"token_logprobs"
:
[
-
1.2345
]},
"finish_reason"
:
"length"
}]}
...
@@ -36,13 +36,12 @@ def ggml_completion_mock(base_url, **kwargs):
...
@@ -36,13 +36,12 @@ def ggml_completion_mock(base_url, **kwargs):
return
result
return
result
class
GGMLLMTest
(
unittest
.
TestCase
):
class
GGMLLMTest
(
unittest
.
TestCase
):
@
patch
(
'lm_eval.models.ggml.ggml_completion'
,
new
=
ggml_completion_mock
)
@
patch
(
'lm_eval.models.ggml.ggml_completion'
,
side_effect
=
ggml_completion_mock
)
def
test_loglikelihood
(
self
):
def
test_loglikelihood
(
self
,
ggml_completion_mock
):
lm
=
GGMLLM
(
base_url
)
lm
=
GGMLLM
(
base_url
)
lm
.
ggml_completion
=
ggml_completion_mock
# Test loglikelihood
# Test loglikelihood
requests
=
[(
"context1"
,
"continuation1"
),
(
"context2"
,
"continuation2"
)]
requests
=
[(
"context1"
,
"continuation1"
),
(
"context2"
,
"continuation2"
)]
res
=
lm
.
loglikelihood
(
requests
)
res
=
lm
.
loglikelihood
(
requests
)
...
@@ -51,19 +50,16 @@ class GGMLLMTest(unittest.TestCase):
...
@@ -51,19 +50,16 @@ class GGMLLMTest(unittest.TestCase):
expected_res
=
[(
logprob
,
True
)
for
logprob
in
[
-
1.2345
,
-
1.2345
]]
expected_res
=
[(
logprob
,
True
)
for
logprob
in
[
-
1.2345
,
-
1.2345
]]
self
.
assertEqual
(
res
,
expected_res
)
self
.
assertEqual
(
res
,
expected_res
)
@
patch
(
'lm_eval.models.ggml.ggml_completion'
,
new
=
ggml_completion_mock
)
@
patch
(
'lm_eval.models.ggml.ggml_completion'
,
side_effect
=
ggml_completion_mock
)
def
test_greedy_until
(
self
):
def
test_greedy_until
(
self
,
ggml_completion_mock
):
lm
=
GGMLLM
(
base_url
)
lm
=
GGMLLM
(
base_url
)
# Set the ggml_completion method to the defined mock
lm
.
ggml_completion
=
ggml_completion_mock
# Test greedy_until
# Test greedy_until
requests
=
[(
"input1"
,
{
"until"
:
"stop1"
}),
(
"input2"
,
{
"until"
:
"stop2"
})]
requests
=
[(
"input1"
,
{
"until"
:
"stop1"
}),
(
"input2"
,
{
"until"
:
"stop2"
})]
res
=
lm
.
greedy_until
(
requests
)
res
=
lm
.
greedy_until
(
requests
)
# Assert the greedy_until response is correct
# Assert the greedy_until response is correct
expected_res
=
[]
expected_res
=
[
"generated text until stop1"
,
"generated text until stop1"
]
self
.
assertEqual
(
res
,
expected_res
)
self
.
assertEqual
(
res
,
expected_res
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
tests/testdata/ggml_test_01d366e32dd8ae86bd079b6822814dcafad69a9082e4cf4db9633eaad47933c2.pkl
View file @
97d31082
No preview for this file type
tests/testdata/ggml_test_04e9938f35d50bceb56453089ce5c7a0738ac878d40ded36f8a1fc170ab54b18.pkl
deleted
100644 → 0
View file @
896bd5f9
File deleted
tests/testdata/ggml_test_941e4a484a2f5d4d99b45084003946423f63cc2955e9400f7153a51cbed9470a.pkl
deleted
100644 → 0
View file @
896bd5f9
File deleted
tests/testdata/ggml_test_e7804132de7c4a26d33e65d1853c1a97d3b4b364da6bf7fcee2883953b61e6c6.pkl
View file @
97d31082
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment