Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
314f7176
Unverified
Commit
314f7176
authored
Jul 23, 2025
by
Baber Abbasi
Committed by
GitHub
Jul 23, 2025
Browse files
remove trust-remote-code in configs; fix escape sequences (#3180)
* remove trust-remote-code * add W605 rule
parent
8c6fde08
Changes
98
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6 additions
and
42 deletions
+6
-42
lm_eval/tasks/hrm8k/default/utils.py
lm_eval/tasks/hrm8k/default/utils.py
+3
-3
lm_eval/tasks/hrm8k/en/utils.py
lm_eval/tasks/hrm8k/en/utils.py
+3
-3
lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml
...erse_scaling/inverse_scaling_winobias_antistereotype.yaml
+0
-2
lm_eval/tasks/kobest/kobest_sentineg.yaml
lm_eval/tasks/kobest/kobest_sentineg.yaml
+0
-2
lm_eval/tasks/kobest/kobest_wic.yaml
lm_eval/tasks/kobest/kobest_wic.yaml
+0
-2
lm_eval/tasks/kormedmcqa/_template_yaml
lm_eval/tasks/kormedmcqa/_template_yaml
+0
-2
lm_eval/tasks/lambada/lambada_openai.yaml
lm_eval/tasks/lambada/lambada_openai.yaml
+0
-2
lm_eval/tasks/leaderboard/math/_template_yaml
lm_eval/tasks/leaderboard/math/_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml
...al/tasks/llama3/instruct/mmlu/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_cot/_mmlu_cot_llama_template_yaml
...ks/llama3/instruct/mmlu_cot/_mmlu_cot_llama_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_de/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_de/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_es/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_es/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_fr/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_fr/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_hi/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_hi/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_it/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_it/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml
...val/tasks/llama3/instruct/mmlu_pro/_default_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_pt/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_pt/_continuation_template_yaml
+0
-2
lm_eval/tasks/llama3/instruct/mmlu_th/_continuation_template_yaml
...tasks/llama3/instruct/mmlu_th/_continuation_template_yaml
+0
-2
lm_eval/tasks/logiqa/logiqa.yaml
lm_eval/tasks/logiqa/logiqa.yaml
+0
-2
lm_eval/tasks/logiqa2/logieval.yaml
lm_eval/tasks/logiqa2/logieval.yaml
+0
-2
No files found.
lm_eval/tasks/hrm8k/default/utils.py
View file @
314f7176
...
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
...
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
return
retval
return
retval
def
get_answer_with_dollar_sign
(
s
):
def
get_answer_with_dollar_sign
(
s
):
first_pattern
=
"\$(.*)\$"
first_pattern
=
r
"\$(.*)\$"
last_match
=
None
last_match
=
None
matches
=
re
.
findall
(
first_pattern
,
s
)
matches
=
re
.
findall
(
first_pattern
,
s
)
if
matches
:
if
matches
:
...
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
...
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
if
"
\\
n"
in
last_match
:
if
"
\\
n"
in
last_match
:
last_match
=
last_match
.
split
(
"
\\
n"
)[
0
]
last_match
=
last_match
.
split
(
"
\\
n"
)[
0
]
else
:
else
:
pattern
=
"(?:
\\
$)?\d+(?:\
.
\d+)?(?![\
w
\d])"
pattern
=
"(?:
\\
$)?
\
\
d+(?:
\
\
.
\
\
d+)?(?![
\
\
w
\
\
d])"
matches
=
re
.
findall
(
pattern
,
s
)
matches
=
re
.
findall
(
pattern
,
s
)
if
matches
:
if
matches
:
last_match
=
matches
[
-
1
]
last_match
=
matches
[
-
1
]
...
@@ -250,7 +250,7 @@ def _strip_string(string):
...
@@ -250,7 +250,7 @@ def _strip_string(string):
# remove percentage
# remove percentage
string
=
string
.
replace
(
"
\\
%"
,
""
)
string
=
string
.
replace
(
"
\\
%"
,
""
)
string
=
string
.
replace
(
"\%"
,
""
)
string
=
string
.
replace
(
r
"\%"
,
""
)
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
string
=
string
.
replace
(
" ."
,
" 0."
)
string
=
string
.
replace
(
" ."
,
" 0."
)
...
...
lm_eval/tasks/hrm8k/en/utils.py
View file @
314f7176
...
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
...
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
return
retval
return
retval
def
get_answer_with_dollar_sign
(
s
):
def
get_answer_with_dollar_sign
(
s
):
first_pattern
=
"\$(.*)\$"
first_pattern
=
r
"\$(.*)\$"
last_match
=
None
last_match
=
None
matches
=
re
.
findall
(
first_pattern
,
s
)
matches
=
re
.
findall
(
first_pattern
,
s
)
if
matches
:
if
matches
:
...
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
...
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
if
"
\\
n"
in
last_match
:
if
"
\\
n"
in
last_match
:
last_match
=
last_match
.
split
(
"
\\
n"
)[
0
]
last_match
=
last_match
.
split
(
"
\\
n"
)[
0
]
else
:
else
:
pattern
=
"(?:
\\
$)?\d+(?:\
.
\d+)?(?![\
w
\d])"
pattern
=
"(?:
\\
$)?
\
\
d+(?:
\
\
.
\
\
d+)?(?![
\
\
w
\
\
d])"
matches
=
re
.
findall
(
pattern
,
s
)
matches
=
re
.
findall
(
pattern
,
s
)
if
matches
:
if
matches
:
last_match
=
matches
[
-
1
]
last_match
=
matches
[
-
1
]
...
@@ -250,7 +250,7 @@ def _strip_string(string):
...
@@ -250,7 +250,7 @@ def _strip_string(string):
# remove percentage
# remove percentage
string
=
string
.
replace
(
"
\\
%"
,
""
)
string
=
string
.
replace
(
"
\\
%"
,
""
)
string
=
string
.
replace
(
"\%"
,
""
)
string
=
string
.
replace
(
r
"\%"
,
""
)
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
string
=
string
.
replace
(
" ."
,
" 0."
)
string
=
string
.
replace
(
" ."
,
" 0."
)
...
...
lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml
View file @
314f7176
...
@@ -14,7 +14,5 @@ metric_list:
...
@@ -14,7 +14,5 @@ metric_list:
-
metric
:
acc_norm
-
metric
:
acc_norm
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
dataset_kwargs
:
trust_remote_code
:
true
metadata
:
metadata
:
version
:
0
version
:
0
lm_eval/tasks/kobest/kobest_sentineg.yaml
View file @
314f7176
...
@@ -19,5 +19,3 @@ metric_list:
...
@@ -19,5 +19,3 @@ metric_list:
higher_is_better
:
True
higher_is_better
:
True
metadata
:
metadata
:
version
:
1.0
version
:
1.0
dataset_kwargs
:
trust_remote_code
:
true
lm_eval/tasks/kobest/kobest_wic.yaml
View file @
314f7176
...
@@ -19,5 +19,3 @@ metric_list:
...
@@ -19,5 +19,3 @@ metric_list:
higher_is_better
:
True
higher_is_better
:
True
metadata
:
metadata
:
version
:
1.0
version
:
1.0
dataset_kwargs
:
trust_remote_code
:
true
lm_eval/tasks/kormedmcqa/_template_yaml
View file @
314f7176
...
@@ -29,5 +29,3 @@ generation_kwargs:
...
@@ -29,5 +29,3 @@ generation_kwargs:
max_gen_toks: 1024
max_gen_toks: 1024
metadata:
metadata:
version: 2.0
version: 2.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/lambada/lambada_openai.yaml
View file @
314f7176
...
@@ -18,5 +18,3 @@ metric_list:
...
@@ -18,5 +18,3 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
metadata
:
metadata
:
version
:
1.0
version
:
1.0
dataset_kwargs
:
trust_remote_code
:
true
lm_eval/tasks/leaderboard/math/_template_yaml
View file @
314f7176
...
@@ -22,8 +22,6 @@ metric_list:
...
@@ -22,8 +22,6 @@ metric_list:
num_fewshot: 4
num_fewshot: 4
metadata:
metadata:
version: 3.0
version: 3.0
dataset_kwargs:
trust_remote_code: true
fewshot_config:
fewshot_config:
sampler: first_n
sampler: first_n
samples: !function utils.list_fewshot_samples
samples: !function utils.list_fewshot_samples
lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml
View file @
314f7176
...
@@ -29,5 +29,3 @@ filter_list:
...
@@ -29,5 +29,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_cot/_mmlu_cot_llama_template_yaml
View file @
314f7176
...
@@ -23,6 +23,4 @@ metric_list:
...
@@ -23,6 +23,4 @@ metric_list:
ignore_punctuation: true
ignore_punctuation: true
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
num_fewshot: 0
num_fewshot: 0
lm_eval/tasks/llama3/instruct/mmlu_de/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_es/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_fr/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_hi/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_it/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml
View file @
314f7176
...
@@ -31,5 +31,3 @@ filter_list:
...
@@ -31,5 +31,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_pt/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/llama3/instruct/mmlu_th/_continuation_template_yaml
View file @
314f7176
...
@@ -28,5 +28,3 @@ filter_list:
...
@@ -28,5 +28,3 @@ filter_list:
- function: take_first
- function: take_first
metadata:
metadata:
version: 1.0
version: 1.0
dataset_kwargs:
trust_remote_code: true
lm_eval/tasks/logiqa/logiqa.yaml
View file @
314f7176
...
@@ -19,5 +19,3 @@ metric_list:
...
@@ -19,5 +19,3 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
metadata
:
metadata
:
version
:
1.0
version
:
1.0
dataset_kwargs
:
trust_remote_code
:
true
lm_eval/tasks/logiqa2/logieval.yaml
View file @
314f7176
...
@@ -25,5 +25,3 @@ filter_list:
...
@@ -25,5 +25,3 @@ filter_list:
-
function
:
"
take_first"
-
function
:
"
take_first"
metadata
:
metadata
:
version
:
0.0
version
:
0.0
dataset_kwargs
:
trust_remote_code
:
true
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment