Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
36f11110
Commit
36f11110
authored
Jul 05, 2023
by
cky
Committed by
gaotong
Jul 05, 2023
Browse files
update datasets
parent
3cfe73de
Changes
31
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
176 additions
and
0 deletions
+176
-0
configs/models/classic/hf_llama.py
configs/models/classic/hf_llama.py
+22
-0
configs/summarizers/groups/flores.py
configs/summarizers/groups/flores.py
+25
-0
docs/en/prompt/meta_template.md
docs/en/prompt/meta_template.md
+1
-0
docs/en/user_guides/config.md
docs/en/user_guides/config.md
+2
-0
docs/zh_cn/_templates/404.html
docs/zh_cn/_templates/404.html
+18
-0
docs/zh_cn/prompt/prompt_template.md
docs/zh_cn/prompt/prompt_template.md
+1
-0
docs/zh_cn/user_guides/models.md
docs/zh_cn/user_guides/models.md
+1
-0
opencompass/datasets/cmnli.py
opencompass/datasets/cmnli.py
+27
-0
opencompass/datasets/xlsum.py
opencompass/datasets/xlsum.py
+33
-0
opencompass/datasets/xsum.py
opencompass/datasets/xsum.py
+36
-0
opencompass/openicl/icl_evaluator/icl_base_evaluator.py
opencompass/openicl/icl_evaluator/icl_base_evaluator.py
+10
-0
No files found.
configs/models/classic/hf_llama.py
0 → 100644
View file @
36f11110
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# LLaMA 7B
dict
(
type
=
HuggingFaceCausalLM
,
path
=
"decapoda-research/llama-7b-hf"
,
tokenizer_path
=
'decapoda-research/llama-7b-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
use_fast
=
False
,
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
device_map
=
'auto'
),
batch_padding
=
False
,
# if false, inference with for-loop without batch padding
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
)
]
configs/summarizers/groups/flores.py
0 → 100644
View file @
36f11110
flores_summary_groups
=
[]
_flores_lang_map
=
{
'Indo-European-Germanic'
:
[
'afr'
,
'dan'
,
'deu'
,
'isl'
,
'ltz'
,
'nld'
,
'nob'
,
'swe'
],
'Indo-European-Romance'
:
[
'ast'
,
'cat'
,
'fra'
,
'glg'
,
'oci'
,
'por'
,
'ron'
,
'spa'
],
'Indo-European-Slavic'
:
[
'bel'
,
'bos'
,
'bul'
,
'ces'
,
'hrv'
,
'mkd'
,
'pol'
,
'rus'
,
'slk'
,
'slv'
,
'srp'
,
'ukr'
],
'Indo-European-Indo-Aryan'
:
[
'asm'
,
'ben'
,
'guj'
,
'hin'
,
'mar'
,
'npi'
,
'ory'
,
'pan'
,
'snd'
,
'urd'
],
'Indo-European-Other'
:
[
'ckb'
,
'cym'
,
'ell'
,
'fas'
,
'gle'
,
'hye'
,
'ita'
,
'lav'
,
'lit'
,
'pus'
,
'tgk'
],
'Austronesian'
:
[
'ceb'
,
'ind'
,
'jav'
,
'mri'
,
'msa'
,
'tgl'
],
'Atlantic-Congo'
:
[
'ibo'
,
'kam'
,
'kea'
,
'lin'
,
'lug'
,
'nso'
,
'nya'
,
'sna'
,
'swh'
,
'umb'
,
'wol'
,
'xho'
,
'yor'
,
'zul'
],
'Afro-Asiatic'
:
[
'amh'
,
'ara'
,
'ful'
,
'mlt'
,
'orm'
,
'som'
],
'Turkic'
:
[
'azj'
,
'kaz'
,
'kir'
,
'tur'
,
'uzb'
],
'Dravidian'
:
[
'kan'
,
'mal'
,
'tam'
,
'tel'
],
'Sino-Tibetan'
:
[
'mya'
,
'zho_simpl'
,
'zho_trad'
],
'Other'
:
[
'est'
,
'fin'
,
'hau'
,
'heb'
,
'hun'
,
'jpn'
,
'kat'
,
'khm'
,
'kor'
,
'lao'
,
'luo'
,
'mon'
,
'tha'
,
'vie'
],
}
for
_lang_serie
in
_flores_lang_map
:
flores_summary_groups
.
append
({
'name'
:
f
'flores_100_
{
_lang_serie
}
_English'
,
'subsets'
:
[
f
'flores_100_
{
lang_name
}
-eng'
for
lang_name
in
_flores_lang_map
[
_lang_serie
]]
})
flores_summary_groups
.
append
({
'name'
:
f
'flores_100_English_
{
_lang_serie
}
'
,
'subsets'
:
[
f
'flores_100_eng-
{
lang_name
}
'
for
lang_name
in
_flores_lang_map
[
_lang_serie
]]
})
docs/en/prompt/meta_template.md
0 → 100644
View file @
36f11110
# Meta-Prompt
\ No newline at end of file
docs/en/user_guides/config.md
0 → 100644
View file @
36f11110
# Learn About Config
docs/zh_cn/_templates/404.html
0 → 100644
View file @
36f11110
{% extends "layout.html" %}
{% block body %}
<h1>
Page Not Found
</h1>
<p>
The page you are looking for cannot be found.
</p>
<p>
If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
the content table left, or go to
<a
href=
"{{ pathto(root_doc) }}"
>
the homepage
</a>
.
</p>
<!-- <p>
If you cannot find documentation you want, please <a
href="">open an issue</a> to tell us!
</p> -->
{% endblock %}
docs/zh_cn/prompt/prompt_template.md
0 → 100644
View file @
36f11110
# Prompt 模板
\ No newline at end of file
docs/zh_cn/user_guides/models.md
0 → 100644
View file @
36f11110
# 准备模型
opencompass/datasets/cmnli.py
0 → 100644
View file @
36f11110
import
json
from
datasets
import
Dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
cmnliDataset_V2
(
BaseDataset
):
@
staticmethod
def
load
(
path
):
data
=
[]
with
open
(
path
,
'r'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
if
line
[
'label'
]
==
'-'
:
continue
line
[
'label'
]
=
{
'entailment'
:
'A'
,
'contradiction'
:
'B'
,
'neutral'
:
'C'
,
}[
line
[
'label'
]]
data
.
append
(
line
)
return
Dataset
.
from_list
(
data
)
opencompass/datasets/xlsum.py
0 → 100644
View file @
36f11110
from
datasets
import
concatenate_datasets
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
XLSUMDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
path
=
kwargs
.
get
(
'path'
,
None
)
lans
=
[
'oromo'
,
'french'
,
'amharic'
,
'arabic'
,
'azerbaijani'
,
'bengali'
,
'burmese'
,
'chinese_simplified'
,
'chinese_traditional'
,
'welsh'
,
'english'
,
'kirundi'
,
'gujarati'
,
'hausa'
,
'hindi'
,
'igbo'
,
'indonesian'
,
'japanese'
,
'korean'
,
'kyrgyz'
,
'marathi'
,
'spanish'
,
'scottish_gaelic'
,
'nepali'
,
'pashto'
,
'persian'
,
'pidgin'
,
'portuguese'
,
'punjabi'
,
'russian'
,
'serbian_cyrillic'
,
'serbian_latin'
,
'sinhala'
,
'somali'
,
'swahili'
,
'tamil'
,
'telugu'
,
'thai'
,
'tigrinya'
,
'turkish'
,
'ukrainian'
,
'urdu'
,
'uzbek'
,
'vietnamese'
,
'yoruba'
]
datasets
=
[]
for
lan
in
lans
:
dataset
=
load_dataset
(
path
,
lan
)[
'validation'
]
datasets
.
append
(
dataset
)
combined_dataset
=
concatenate_datasets
(
datasets
)
return
combined_dataset
opencompass/datasets/xsum.py
0 → 100644
View file @
36f11110
import
json
from
datasets
import
Dataset
from
opencompass.registry
import
LOAD_DATASET
,
TEXT_POSTPROCESSORS
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
XsumDataset
(
BaseDataset
):
@
staticmethod
def
load
(
path
:
str
):
with
open
(
path
,
'r'
,
errors
=
'ignore'
)
as
in_f
:
rows
=
[]
for
i
,
line
in
enumerate
(
in_f
):
if
i
==
1000
:
break
sample
=
json
.
loads
(
line
.
strip
())
dialogue
=
sample
[
'dialogue'
]
summary
=
sample
[
'summary'
]
if
isinstance
(
dialogue
,
float
)
or
isinstance
(
summary
,
float
):
continue
rows
.
append
({
'dialogue'
:
dialogue
,
'summary'
:
summary
})
dataset
=
Dataset
.
from_dict
({
'dialogue'
:
[
row
[
'dialogue'
]
for
row
in
rows
],
'summary'
:
[
row
[
'summary'
]
for
row
in
rows
]
})
return
dataset
@
TEXT_POSTPROCESSORS
.
register_module
(
'Xsum'
)
def
Xsum_postprocess
(
text
:
str
)
->
str
:
text
=
text
.
strip
().
split
(
'
\n
'
)[
0
].
strip
()
return
text
opencompass/openicl/icl_evaluator/icl_base_evaluator.py
0 → 100644
View file @
36f11110
"""Base Evaluator."""
from
typing
import
List
class
BaseEvaluator
:
def
__init__
(
self
)
->
None
:
pass
def
score
(
self
):
raise
NotImplementedError
(
"Method hasn't been implemented yet"
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment