Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0126f6d1
Unverified
Commit
0126f6d1
authored
May 15, 2025
by
Baber Abbasi
Committed by
GitHub
May 15, 2025
Browse files
fix formatting (#2759)
parent
96966f53
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
48 additions
and
0 deletions
+48
-0
lm_eval/tasks/paws-x/paws_de.yaml
lm_eval/tasks/paws-x/paws_de.yaml
+1
-0
lm_eval/tasks/paws-x/paws_en.yaml
lm_eval/tasks/paws-x/paws_en.yaml
+1
-0
lm_eval/tasks/paws-x/paws_es.yaml
lm_eval/tasks/paws-x/paws_es.yaml
+1
-0
lm_eval/tasks/paws-x/paws_fr.yaml
lm_eval/tasks/paws-x/paws_fr.yaml
+1
-0
lm_eval/tasks/paws-x/pawsx_template_yaml
lm_eval/tasks/paws-x/pawsx_template_yaml
+1
-0
lm_eval/tasks/paws-x/utils.py
lm_eval/tasks/paws-x/utils.py
+43
-0
No files found.
lm_eval/tasks/paws-x/paws_de.yaml
View file @
0126f6d1
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", richtig? Nein, "+sentence2, sentence1+", richtig
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", richtig? Nein, "+sentence2, sentence1+", richtig
"+sentence2]}}'
"+sentence2]}}'
doc_to_text
:
'
'
doc_to_text
:
'
'
include
:
pawsx_template_yaml
include
:
pawsx_template_yaml
process_docs
:
!function
utils.process_docs_paraphrases
task
:
paws_de
task
:
paws_de
lm_eval/tasks/paws-x/paws_en.yaml
View file @
0126f6d1
...
@@ -3,4 +3,5 @@ dataset_name: en
...
@@ -3,4 +3,5 @@ dataset_name: en
doc_to_choice
:
'
{{[sentence1+",
right?
No,
"+sentence2,
sentence1+",
right?
Yes,
"+sentence2]}}'
doc_to_choice
:
'
{{[sentence1+",
right?
No,
"+sentence2,
sentence1+",
right?
Yes,
"+sentence2]}}'
doc_to_text
:
'
'
doc_to_text
:
'
'
include
:
pawsx_template_yaml
include
:
pawsx_template_yaml
process_docs
:
!function
utils.process_docs_paraphrases
task
:
paws_en
task
:
paws_en
lm_eval/tasks/paws-x/paws_es.yaml
View file @
0126f6d1
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", verdad? No, "+sentence2, sentence1+", verdad? S
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", verdad? No, "+sentence2, sentence1+", verdad? S
"+sentence2]}}'
"+sentence2]}}'
doc_to_text
:
'
'
doc_to_text
:
'
'
include
:
pawsx_template_yaml
include
:
pawsx_template_yaml
process_docs
:
!function
utils.process_docs_paraphrases
task
:
paws_es
task
:
paws_es
lm_eval/tasks/paws-x/paws_fr.yaml
View file @
0126f6d1
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", n''est-ce pas? Non, "+sentence2, sentence1+", n'
...
@@ -4,4 +4,5 @@ doc_to_choice: '{{[sentence1+", n''est-ce pas? Non, "+sentence2, sentence1+", n'
pas?
Oui,
"+sentence2]}}'
pas?
Oui,
"+sentence2]}}'
doc_to_text
:
'
'
doc_to_text
:
'
'
include
:
pawsx_template_yaml
include
:
pawsx_template_yaml
process_docs
:
!function
utils.process_docs_paraphrases
task
:
paws_fr
task
:
paws_fr
lm_eval/tasks/paws-x/pawsx_template_yaml
View file @
0126f6d1
...
@@ -11,6 +11,7 @@ test_split: test
...
@@ -11,6 +11,7 @@ test_split: test
doc_to_text: null
doc_to_text: null
doc_to_target: label
doc_to_target: label
doc_to_choice: null
doc_to_choice: null
target_delimiter: ""
metric_list:
metric_list:
- metric: acc
- metric: acc
aggregation: mean
aggregation: mean
...
...
lm_eval/tasks/paws-x/utils.py
0 → 100644
View file @
0126f6d1
import
re
def
general_detokenize
(
string
):
string
=
string
.
replace
(
" n't"
,
"n't"
)
string
=
string
.
replace
(
" )"
,
")"
)
string
=
string
.
replace
(
"( "
,
"("
)
string
=
string
.
replace
(
'" '
,
'"'
)
string
=
string
.
replace
(
' "'
,
'"'
)
string
=
re
.
sub
(
r
" (['.,])"
,
r
"\1"
,
string
)
return
string
def
lowercase_first_letter
(
text
):
return
text
[
0
].
lower
()
+
text
[
1
:]
def
process_docs_paraphrases
(
dataset
):
empty_docs
=
[]
def
_process_doc
(
doc
):
if
doc
[
"sentence1"
]
not
in
[
None
,
""
]
and
doc
[
"sentence2"
]
not
in
[
None
,
""
]:
doc
[
"sentence1"
]
=
general_detokenize
(
doc
[
"sentence1"
]).
strip
()
doc
[
"sentence2"
]
=
general_detokenize
(
doc
[
"sentence2"
]).
strip
()
# Remove final punctuation mark in the first sentence
if
doc
[
"sentence1"
].
endswith
((
"."
,
","
,
";"
)):
doc
[
"sentence1"
]
=
doc
[
"sentence1"
][:
-
1
]
# Start the second sentence in lowercase (to be used after "Yes, ...")
doc
[
"sentence2"
]
=
lowercase_first_letter
(
doc
[
"sentence2"
])
return
doc
else
:
empty_docs
.
append
(
doc
)
return
doc
if
empty_docs
!=
[]:
len_empty_docs
=
len
(
empty_docs
)
print
(
f
"Found
{
len_empty_docs
}
empty documents out of the
{
len
(
dataset
)
}
total docs in the dataset:
{
empty_docs
}
"
)
return
dataset
.
filter
(
lambda
doc
:
doc
[
"sentence1"
]
not
in
[
None
,
""
]
and
doc
[
"sentence2"
]
not
in
[
None
,
""
]
).
map
(
_process_doc
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment