Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
c0acd06b
Unverified
Commit
c0acd06b
authored
Nov 16, 2023
by
Raymond Zhang
Committed by
GitHub
Nov 16, 2023
Browse files
[Feature] Add FinanceIQ dataset (#596)
parent
8160cb84
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
201 additions
and
0 deletions
+201
-0
configs/datasets/FinanceIQ/FinanceIQ_gen.py
configs/datasets/FinanceIQ/FinanceIQ_gen.py
+4
-0
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
+77
-0
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
+4
-0
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
+76
-0
opencompass/datasets/FinanceIQ.py
opencompass/datasets/FinanceIQ.py
+39
-0
opencompass/datasets/__init__.py
opencompass/datasets/__init__.py
+1
-0
No files found.
configs/datasets/FinanceIQ/FinanceIQ_gen.py
0 → 100644
View file @
c0acd06b
from
mmengine.config
import
read_base
with
read_base
():
from
.FinanceIQ_gen_e0e6b5
import
FinanceIQ_datasets
# noqa: F401, F403
configs/datasets/FinanceIQ/FinanceIQ_gen_e0e6b5.py
0 → 100644
View file @
c0acd06b
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.openicl.icl_evaluator
import
AccEvaluator
from
opencompass.datasets
import
FinanceIQDataset
from
opencompass.utils.text_postprocessors
import
first_capital_postprocess
financeIQ_subject_mapping_en
=
{
'certified_public_accountant'
:
'注册会计师(CPA)'
,
'banking_qualification'
:
'银行从业资格'
,
'securities_qualification'
:
'证券从业资格'
,
'fund_qualification'
:
'基金从业资格'
,
'insurance_qualification'
:
'保险从业资格CICE'
,
'economic_analyst'
:
'经济师'
,
'taxation_practitioner'
:
'税务师'
,
'futures_qualification'
:
'期货从业资格'
,
'certified_fin_planner'
:
'理财规划师'
,
'actuary_fin_math'
:
'精算师-金融数学'
,
}
financeIQ_subject_mapping
=
{
'注册会计师(CPA)'
:
'注册会计师(CPA)'
,
'银行从业资格'
:
'银行从业资格'
,
'证券从业资格'
:
'证券从业资格'
,
'基金从业资格'
:
'基金从业资格'
,
'保险从业资格CICE'
:
'保险从业资格CICE'
,
'经济师'
:
'经济师'
,
'税务师'
:
'税务师'
,
'期货从业资格'
:
'期货从业资格'
,
'理财规划师'
:
'理财规划师'
,
'精算师-金融数学'
:
'精算师-金融数学'
,
}
financeIQ_all_sets
=
list
(
financeIQ_subject_mapping
.
keys
())
financeIQ_datasets
=
[]
for
_name
in
financeIQ_all_sets
:
_ch_name
=
financeIQ_subject_mapping
[
_name
]
financeIQ_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
begin
=
"</E>"
,
round
=
[
dict
(
role
=
"HUMAN"
,
prompt
=
f
"以下是关于
{
_ch_name
}
的单项选择题,请直接给出正确答案的选项。
\n
题目:{{question}}
\n
A. {{A}}
\n
B. {{B}}
\n
C. {{C}}
\n
D. {{D}}"
),
dict
(
role
=
"BOT"
,
prompt
=
'答案是: {answer}'
),
]),
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
[
0
,
1
,
2
,
3
,
4
]),
inferencer
=
dict
(
type
=
GenInferencer
),
)
financeIQ_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
),
pred_postprocessor
=
dict
(
type
=
first_capital_postprocess
))
financeIQ_datasets
.
append
(
dict
(
type
=
FinanceIQDataset
,
path
=
"./data/FinanceIQ/"
,
name
=
_name
,
abbr
=
f
"FinanceIQ-
{
_name
}
"
,
reader_cfg
=
dict
(
input_columns
=
[
"question"
,
"A"
,
"B"
,
"C"
,
"D"
],
output_column
=
"answer"
,
train_split
=
"dev"
,
test_split
=
'test'
),
infer_cfg
=
financeIQ_infer_cfg
,
eval_cfg
=
financeIQ_eval_cfg
,
))
del
_name
,
_ch_name
configs/datasets/FinanceIQ/FinanceIQ_ppl.py
0 → 100644
View file @
c0acd06b
from
mmengine.config
import
read_base
with
read_base
():
from
.FinanceIQ_ppl_42b9bd
import
FinanceIQ_datasets
# noqa: F401, F403
configs/datasets/FinanceIQ/FinanceIQ_ppl_42b9bd.py
0 → 100644
View file @
c0acd06b
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
PPLInferencer
from
opencompass.openicl.icl_evaluator
import
AccEvaluator
from
opencompass.datasets
import
FinanceIQDataset
financeIQ_subject_mapping_en
=
{
'certified_public_accountant'
:
'注册会计师(CPA)'
,
'banking_qualification'
:
'银行从业资格'
,
'securities_qualification'
:
'证券从业资格'
,
'fund_qualification'
:
'基金从业资格'
,
'insurance_qualification'
:
'保险从业资格CICE'
,
'economic_analyst'
:
'经济师'
,
'taxation_practitioner'
:
'税务师'
,
'futures_qualification'
:
'期货从业资格'
,
'certified_fin_planner'
:
'理财规划师'
,
'actuary_fin_math'
:
'精算师-金融数学'
,
}
financeIQ_subject_mapping
=
{
'注册会计师(CPA)'
:
'注册会计师(CPA)'
,
'银行从业资格'
:
'银行从业资格'
,
'证券从业资格'
:
'证券从业资格'
,
'基金从业资格'
:
'基金从业资格'
,
'保险从业资格CICE'
:
'保险从业资格CICE'
,
'经济师'
:
'经济师'
,
'税务师'
:
'税务师'
,
'期货从业资格'
:
'期货从业资格'
,
'理财规划师'
:
'理财规划师'
,
'精算师-金融数学'
:
'精算师-金融数学'
,
}
financeIQ_all_sets
=
list
(
financeIQ_subject_mapping
.
keys
())
financeIQ_datasets
=
[]
for
_name
in
financeIQ_all_sets
:
_ch_name
=
financeIQ_subject_mapping
[
_name
]
financeIQ_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
{
answer
:
dict
(
begin
=
"</E>"
,
round
=
[
dict
(
role
=
"HUMAN"
,
prompt
=
f
"以下是关于
{
_ch_name
}
的单项选择题,请直接给出正确答案的选项。
\n
题目:{{question}}
\n
A. {{A}}
\n
B. {{B}}
\n
C. {{C}}
\n
D. {{D}}"
),
dict
(
role
=
"BOT"
,
prompt
=
f
'答案是:
{
answer
}
'
),
])
for
answer
in
[
"A"
,
"B"
,
"C"
,
"D"
]
},
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
[
0
,
1
,
2
,
3
,
4
]),
inferencer
=
dict
(
type
=
PPLInferencer
),
)
financeIQ_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
))
financeIQ_datasets
.
append
(
dict
(
type
=
FinanceIQDataset
,
path
=
"./data/FinanceIQ/"
,
name
=
_name
,
abbr
=
f
"FinanceIQ-
{
_name
}
"
,
reader_cfg
=
dict
(
input_columns
=
[
"question"
,
"A"
,
"B"
,
"C"
,
"D"
],
output_column
=
"answer"
,
train_split
=
"dev"
,
test_split
=
'test'
),
infer_cfg
=
financeIQ_infer_cfg
,
eval_cfg
=
financeIQ_eval_cfg
,
))
del
_name
,
_ch_name
opencompass/datasets/FinanceIQ.py
0 → 100644
View file @
c0acd06b
import
csv
import
os.path
as
osp
from
datasets
import
Dataset
,
DatasetDict
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
FinanceIQDataset
(
BaseDataset
):
# @staticmethod
# def load(path: str):
# from datasets import load_dataset
# return load_dataset('csv', data_files={'test': path})
@
staticmethod
def
load
(
path
:
str
,
name
:
str
):
dataset
=
DatasetDict
()
for
split
in
[
'dev'
,
'test'
]:
raw_data
=
[]
filename
=
osp
.
join
(
path
,
split
,
f
'
{
name
}
.csv'
)
with
open
(
filename
,
encoding
=
'utf-8'
)
as
f
:
reader
=
csv
.
reader
(
f
)
_
=
next
(
reader
)
# skip the header
for
row
in
reader
:
assert
len
(
row
)
==
7
raw_data
.
append
({
'question'
:
row
[
1
],
'A'
:
row
[
2
],
'B'
:
row
[
3
],
'C'
:
row
[
4
],
'D'
:
row
[
5
],
'answer'
:
row
[
6
],
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/__init__.py
View file @
c0acd06b
...
@@ -30,6 +30,7 @@ from .drop import * # noqa: F401, F403
...
@@ -30,6 +30,7 @@ from .drop import * # noqa: F401, F403
from
.ds1000
import
*
# noqa: F401, F403
from
.ds1000
import
*
# noqa: F401, F403
from
.ds1000_interpreter
import
*
# noqa: F401, F403
from
.ds1000_interpreter
import
*
# noqa: F401, F403
from
.eprstmt
import
*
# noqa: F401, F403
from
.eprstmt
import
*
# noqa: F401, F403
from
.FinanceIQ
import
*
# noqa: F401, F403
from
.flores
import
*
# noqa: F401, F403
from
.flores
import
*
# noqa: F401, F403
from
.game24
import
*
# noqa: F401, F403
from
.game24
import
*
# noqa: F401, F403
from
.GaokaoBench
import
*
# noqa: F401, F403
from
.GaokaoBench
import
*
# noqa: F401, F403
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment