Unverified Commit 1e445417 authored by Tong Gao's avatar Tong Gao Committed by GitHub
Browse files

[Enhancement] Test linting in CI and fix existing linting errors (#69)

* [Enhancement] Test linting in CI

* fix linting
parent 9a164489
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
skip = *.ipynb skip = *.ipynb
count = count =
quiet-level = 3 quiet-level = 3
ignore-words-list = nd, ans, ques ignore-words-list = nd, ans, ques, rouge
name: lint
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: 3.10
- name: Install pre-commit hook
run: |
pip install pre-commit
pre-commit install
- name: Linting
run: pre-commit run --all-files
...@@ -29,14 +29,12 @@ author = 'OpenCompass Authors' ...@@ -29,14 +29,12 @@ author = 'OpenCompass Authors'
# The full version, including alpha/beta/rc tags # The full version, including alpha/beta/rc tags
# version_file = '../../opencompass/version.py' # version_file = '../../opencompass/version.py'
# def get_version(): # def get_version():
# with open(version_file, 'r') as f: # with open(version_file, 'r') as f:
# exec(compile(f.read(), version_file, 'exec')) # exec(compile(f.read(), version_file, 'exec'))
# return locals()['__version__'] # return locals()['__version__']
release = '0.5.0'
release = "0.5.0"
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
...@@ -141,14 +139,16 @@ latex_elements = { ...@@ -141,14 +139,16 @@ latex_elements = {
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, 'manual'), (root_doc, 'opencompass.tex', 'OpenCompass Documentation', author,
'manual'),
] ]
# -- Options for manual page output ------------------------------------------ # -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)] man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author],
1)]
# -- Options for Texinfo output ---------------------------------------------- # -- Options for Texinfo output ----------------------------------------------
...@@ -156,8 +156,9 @@ man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1) ...@@ -156,8 +156,9 @@ man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
(root_doc, 'opencompass', 'OpenCompass Documentation', author, 'OpenCompass Authors', (root_doc, 'opencompass', 'OpenCompass Documentation', author,
'AGI evaluation toolbox and benchmark.', 'Miscellaneous'), 'OpenCompass Authors', 'AGI evaluation toolbox and benchmark.',
'Miscellaneous'),
] ]
# -- Options for Epub output ------------------------------------------------- # -- Options for Epub output -------------------------------------------------
......
...@@ -12,19 +12,19 @@ class CEvalDataset(BaseDataset): ...@@ -12,19 +12,19 @@ class CEvalDataset(BaseDataset):
@staticmethod @staticmethod
def load(path: str, name: str): def load(path: str, name: str):
dev_dataset = load_dataset( dev_dataset = load_dataset('csv',
'csv', data_files=osp.join(path, 'dev',
data_files=osp.join(path, 'dev', f'{name}_dev.csv'), f'{name}_dev.csv'),
split='train') split='train')
val_dataset = load_dataset( val_dataset = load_dataset('csv',
'csv', data_files=osp.join(path, 'val',
data_files=osp.join(path, 'val', f'{name}_val.csv'), f'{name}_val.csv'),
split='train') split='train')
val_dataset = val_dataset.add_column('explanation', val_dataset = val_dataset.add_column('explanation',
[''] * len(val_dataset)) [''] * len(val_dataset))
test_dataset = load_dataset( test_dataset = load_dataset('csv',
'csv', data_files=osp.join(
data_files=osp.join(path, 'test', f'{name}_test.csv'), path, 'test', f'{name}_test.csv'),
split='train') split='train')
test_dataset = test_dataset.add_column( test_dataset = test_dataset.add_column(
'answer', 'answer',
......
...@@ -16,8 +16,9 @@ class FloresFirst100Dataset(BaseDataset): ...@@ -16,8 +16,9 @@ class FloresFirst100Dataset(BaseDataset):
'dev': 'dev':
load_dataset(path='facebook/flores', name=name, split='dev'), load_dataset(path='facebook/flores', name=name, split='dev'),
'devtest': 'devtest':
load_dataset( load_dataset(path='facebook/flores',
path='facebook/flores', name=name, split='devtest[:100]') name=name,
split='devtest[:100]')
}) })
......
...@@ -3,6 +3,7 @@ import json ...@@ -3,6 +3,7 @@ import json
from datasets import Dataset from datasets import Dataset
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
from .base import BaseDataset from .base import BaseDataset
......
...@@ -142,7 +142,8 @@ class HuggingFace(BaseModel): ...@@ -142,7 +142,8 @@ class HuggingFace(BaseModel):
tokens = self.tokenizer.batch_encode_plus(inputs, tokens = self.tokenizer.batch_encode_plus(inputs,
padding=True, padding=True,
truncation=True, truncation=True,
max_length=self.max_seq_len - max_out_len) max_length=self.max_seq_len -
max_out_len)
tokens = { tokens = {
k: torch.tensor(np.array(tokens[k]), device=self.model.device) k: torch.tensor(np.array(tokens[k]), device=self.model.device)
for k in tokens if k in ['input_ids', 'attention_mask'] for k in tokens if k in ['input_ids', 'attention_mask']
...@@ -180,10 +181,10 @@ class HuggingFace(BaseModel): ...@@ -180,10 +181,10 @@ class HuggingFace(BaseModel):
input_ids = self.tokenizer(inputs, input_ids = self.tokenizer(inputs,
truncation=True, truncation=True,
max_length=self.max_seq_len - max_out_len)['input_ids'] max_length=self.max_seq_len -
max_out_len)['input_ids']
input_ids = torch.tensor(input_ids, device=self.model.device) input_ids = torch.tensor(input_ids, device=self.model.device)
outputs = self.model.generate(input_ids, outputs = self.model.generate(input_ids, max_new_tokens=max_out_len)
max_new_tokens=max_out_len)
if not self.extract_pred_after_decode: if not self.extract_pred_after_decode:
outputs = outputs[:, input_ids.shape[1]:] outputs = outputs[:, input_ids.shape[1]:]
......
from .icl_dataset_reader import DatasetReader from .icl_dataset_reader import DatasetReader # noqa
from .icl_evaluator import * from .icl_evaluator import * # noqa
from .icl_prompt_template import PromptTemplate from .icl_inferencer import * # noqa
from .icl_retriever import * from .icl_prompt_template import PromptTemplate # noqa
from .icl_inferencer import * from .icl_retriever import * # noqa
from .icl_aucroc_evaluator import AUCROCEvaluator from .icl_aucroc_evaluator import AUCROCEvaluator # noqa
from .icl_base_evaluator import BaseEvaluator from .icl_base_evaluator import BaseEvaluator # noqa
from .icl_em_evaluator import EMEvaluator from .icl_em_evaluator import EMEvaluator # noqa
from .icl_hf_evaluator import * # noqa from .icl_hf_evaluator import * # noqa
from .icl_toxic_evaluator import ToxicEvaluator from .icl_toxic_evaluator import ToxicEvaluator # noqa
from typing import List from typing import List
import numpy as np import numpy as np
from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_auc_score
......
"""Base Evaluator.""" """Base Evaluator."""
from typing import List
class BaseEvaluator: class BaseEvaluator:
def __init__(self) -> None: def __init__(self) -> None:
pass pass
......
from typing import List from typing import List
import evaluate import evaluate
from opencompass.registry import ICL_EVALUATORS from opencompass.registry import ICL_EVALUATORS
...@@ -54,9 +55,12 @@ class HuggingfaceEvaluator(BaseEvaluator): ...@@ -54,9 +55,12 @@ class HuggingfaceEvaluator(BaseEvaluator):
dict: calculated scores. dict: calculated scores.
""" """
if len(predictions) != len(references): if len(predictions) != len(references):
return {'error': 'predictions and references have different ' return {
'error':
'predictions and references have different '
f'length. len(predictions): {len(predictions)}, ' f'length. len(predictions): {len(predictions)}, '
f'len(references): {len(references)}'} f'len(references): {len(references)}'
}
metric = evaluate.load(self.metric) metric = evaluate.load(self.metric)
scores = metric.compute(**self._preprocess(predictions, references)) scores = metric.compute(**self._preprocess(predictions, references))
return self._postprocess(scores) return self._postprocess(scores)
...@@ -103,7 +107,7 @@ class AccEvaluator(HuggingfaceEvaluator): ...@@ -103,7 +107,7 @@ class AccEvaluator(HuggingfaceEvaluator):
Returns: Returns:
dict: postprocessed scores. dict: postprocessed scores.
""" """
scores["accuracy"] *= 100 scores['accuracy'] *= 100
return scores return scores
...@@ -150,7 +154,7 @@ class MccEvaluator(AccEvaluator): ...@@ -150,7 +154,7 @@ class MccEvaluator(AccEvaluator):
Returns: Returns:
dict: postprocessed scores. dict: postprocessed scores.
""" """
scores["matthews_correlation"] *= 100 scores['matthews_correlation'] *= 100
return scores return scores
......
from .icl_base_inferencer import BaseInferencer from .icl_base_inferencer import BaseInferencer # noqa
from .icl_gen_inferencer import GenInferencer from .icl_clp_inferencer import CLPInferencer # noqa
from .icl_ppl_inferencer import PPLInferencer from .icl_gen_inferencer import GenInferencer # noqa
from .icl_clp_inferencer import CLPInferencer from .icl_ppl_inferencer import PPLInferencer # noqa
...@@ -98,8 +98,8 @@ class CLPInferencer(BaseInferencer): ...@@ -98,8 +98,8 @@ class CLPInferencer(BaseInferencer):
# 3. Generate in-context examples for testing inputs # 3. Generate in-context examples for testing inputs
for idx in range(len(ice_idx_list)): for idx in range(len(ice_idx_list)):
ice.append( ice.append(
retriever.generate_ice( retriever.generate_ice(ice_idx_list[idx],
ice_idx_list[idx], ice_template=ice_template)) ice_template=ice_template))
output_handler.save_ice(ice) output_handler.save_ice(ice)
# 4. Collect prompts and calculate conditional log probs # 4. Collect prompts and calculate conditional log probs
...@@ -165,8 +165,7 @@ class CLPInferencer(BaseInferencer): ...@@ -165,8 +165,7 @@ class CLPInferencer(BaseInferencer):
choice_target_ids.append(prompt_token_num - 1) choice_target_ids.append(prompt_token_num - 1)
logger.info('Calculating conditional log probability for prompts.') logger.info('Calculating conditional log probability for prompts.')
for idx in trange( for idx in trange(0,
0,
len(prompt_list), len(prompt_list),
self.batch_size, self.batch_size,
disable=not self.is_main_process): disable=not self.is_main_process):
......
...@@ -25,7 +25,7 @@ class PPLInferencer(BaseInferencer): ...@@ -25,7 +25,7 @@ class PPLInferencer(BaseInferencer):
model (:obj:`BaseModel`, optional): The module to inference. model (:obj:`BaseModel`, optional): The module to inference.
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
the LM. the LM.
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`. batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
output_json_filepath (:obj:`str`, optional): File path for output output_json_filepath (:obj:`str`, optional): File path for output
`JSON` file. `JSON` file.
output_json_filename (:obj:`str`, optional): File name for output output_json_filename (:obj:`str`, optional): File name for output
...@@ -126,7 +126,7 @@ class PPLInferencer(BaseInferencer): ...@@ -126,7 +126,7 @@ class PPLInferencer(BaseInferencer):
label, label,
ice_template=ice_template, ice_template=ice_template,
prompt_template=prompt_template) prompt_template=prompt_template)
prompt_token_num = self.model.get_token_len_from_template( prompt_token_num = self.model.get_token_len_from_template( # noqa
prompt, mode='ppl') # noqa prompt, mode='ppl') # noqa
if normalizing_str is not None: if normalizing_str is not None:
......
from .icl_base_retriever import BaseRetriever from .icl_base_retriever import BaseRetriever # noqa
from .icl_bm25_retriever import BM25Retriever from .icl_bm25_retriever import BM25Retriever # noqa
from .icl_dpp_retriever import DPPRetriever from .icl_dpp_retriever import DPPRetriever # noqa
from .icl_fix_k_retriever import FixKRetriever from .icl_fix_k_retriever import FixKRetriever # noqa
from .icl_mdl_retriever import MDLRetriever from .icl_mdl_retriever import MDLRetriever # noqa
from .icl_random_retriever import RandomRetriever from .icl_random_retriever import RandomRetriever # noqa
from .icl_topk_retriever import TopkRetriever from .icl_topk_retriever import TopkRetriever # noqa
from .icl_votek_retriever import VotekRetriever from .icl_votek_retriever import VotekRetriever # noqa
from .icl_zero_retriever import ZeroRetriever from .icl_zero_retriever import ZeroRetriever # noqa
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment