Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
3829be87
Unverified
Commit
3829be87
authored
Mar 08, 2024
by
Yang Yong
Committed by
GitHub
Mar 08, 2024
Browse files
Fix LightllmApi ppl test (#951)
parent
107e022c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
2 deletions
+27
-2
opencompass/models/lightllm_api.py
opencompass/models/lightllm_api.py
+25
-0
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
+2
-2
No files found.
opencompass/models/lightllm_api.py
View file @
3829be87
import
json
import
json
import
re
from
concurrent.futures
import
ThreadPoolExecutor
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
...
@@ -35,6 +36,7 @@ class LightllmAPI(BaseModel):
...
@@ -35,6 +36,7 @@ class LightllmAPI(BaseModel):
self
.
retry
=
retry
self
.
retry
=
retry
self
.
generation_kwargs
=
generation_kwargs
self
.
generation_kwargs
=
generation_kwargs
self
.
max_out_len
=
self
.
generation_kwargs
.
get
(
'max_new_tokens'
,
1024
)
self
.
max_out_len
=
self
.
generation_kwargs
.
get
(
'max_new_tokens'
,
1024
)
self
.
meta_template
=
meta_template
self
.
token_bucket
=
TokenBucket
(
rate_per_worker
,
False
)
self
.
token_bucket
=
TokenBucket
(
rate_per_worker
,
False
)
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
...
@@ -158,3 +160,26 @@ class LightllmAPI(BaseModel):
...
@@ -158,3 +160,26 @@ class LightllmAPI(BaseModel):
Applicable in both single-thread and multi-thread environments.
Applicable in both single-thread and multi-thread environments.
"""
"""
return
self
.
token_bucket
.
get_token
()
return
self
.
token_bucket
.
get_token
()
def
get_token_len
(
self
,
prompt
:
str
)
->
int
:
"""Get lengths of the tokenized string. Only English and Chinese
characters are counted for now. Users are encouraged to override this
method if more accurate length is needed.
Args:
prompt (str): Input string.
Returns:
int: Length of the input tokens
"""
english_parts
=
re
.
findall
(
r
'[A-Za-z0-9]+'
,
prompt
)
chinese_parts
=
re
.
findall
(
r
'[\u4e00-\u9FFF]+'
,
prompt
)
# Count English words
english_count
=
sum
(
len
(
part
.
split
())
for
part
in
english_parts
)
# Count Chinese words
chinese_count
=
sum
(
len
(
part
)
for
part
in
chinese_parts
)
return
english_count
+
chinese_count
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
View file @
3829be87
...
@@ -108,9 +108,9 @@ class PPLInferencer(BaseInferencer):
...
@@ -108,9 +108,9 @@ class PPLInferencer(BaseInferencer):
ice_template
=
ice_template
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
,
prompt_template
=
prompt_template
,
remain_sep
=
normalizing_str
is
not
None
)
remain_sep
=
normalizing_str
is
not
None
)
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'ppl'
)
if
self
.
max_seq_len
is
not
None
:
if
self
.
max_seq_len
is
not
None
:
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'ppl'
)
while
len
(
ice_idx_list
[
idx
]
while
len
(
ice_idx_list
[
idx
]
)
>
0
and
prompt_token_num
>
self
.
max_seq_len
:
)
>
0
and
prompt_token_num
>
self
.
max_seq_len
:
ice_idx_list
[
idx
]
=
ice_idx_list
[
idx
][:
-
1
]
ice_idx_list
[
idx
]
=
ice_idx_list
[
idx
][:
-
1
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment