Unverified Commit 1e445417 authored by Tong Gao's avatar Tong Gao Committed by GitHub
Browse files

[Enhancement] Test linting in CI and fix existing linting errors (#69)

* [Enhancement] Test linting in CI

* fix linting
parent 9a164489
from .logging import * from .logging import * # noqa
import json
import os
import time
import numpy as np
import openai
import requests
OPENICL_API_NAME_LIST = ['opt-175b', 'gpt3']
OPENICL_API_PARAMETER_DICT = {
'opt-175b': ['URL', 'headers'],
'gpt3': [
'engine', 'temperature', 'max_tokens', 'top_p', 'frequency_penalty',
'presence_penalty', 'sleep_time'
]
}
OPENICL_API_REQUEST_CONFIG = {
'opt-175b': {
'URL': '', # http://xxx/completions or http://xxx/generate
'headers': {
'Content-Type': 'application/json; charset=UTF-8'
}
},
'gpt3': {
'engine': 'text-davinci-003',
'temperature': 0,
'max_tokens': 256,
'top_p': 1.0,
'frequency_penalty': 0.0,
'presence_penalty': 0.0,
'sleep_time': 3
}
}
PROXIES = {'https': '', 'http': ''}
def is_api_available(api_name):
if api_name is None:
return False
return True if api_name in OPENICL_API_NAME_LIST else False
def update_openicl_api_request_config(api_name, **kwargs):
if api_name is None or not is_api_available(api_name):
return
parameter_list = OPENICL_API_PARAMETER_DICT[api_name]
for parameter in parameter_list:
if parameter in kwargs.keys():
OPENICL_API_REQUEST_CONFIG[api_name][parameter] = kwargs[parameter]
def api_get_ppl(api_name, input_texts):
if api_name == 'opt-175b':
pyload = {'prompt': input_texts, 'max_tokens': 0, 'echo': True}
response = json.loads(
requests.post(
OPENICL_API_REQUEST_CONFIG[api_name]['URL'],
data=json.dumps(pyload),
headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'],
proxies=PROXIES).text)
lens = np.array(
[len(r['logprobs']['tokens']) for r in response['choices']])
ce_loss = np.array([
-sum(r['logprobs']['token_logprobs']) for r in response['choices']
])
return ce_loss / lens
if api_name == 'gpt3':
raise NotImplementedError("GPT-3 API doesn't support PPL calculation")
def api_get_tokens(api_name, input_texts):
length_list = [len(text) for text in input_texts]
if api_name == 'opt-175b':
pyload = {'prompt': input_texts, 'max_tokens': 100, 'echo': True}
response = json.loads(
requests.post(
OPENICL_API_REQUEST_CONFIG[api_name]['URL'],
data=json.dumps(pyload),
headers=OPENICL_API_REQUEST_CONFIG[api_name]['headers'],
proxies=PROXIES).text)
return [r['text'] for r in response['choices']], [
r['text'][length:]
for r, length in zip(response['choices'], length_list)
]
if api_name == 'gpt3':
openai.api_key = os.getenv('OPENAI_API_KEY')
response = openai.Completion.create(
engine=OPENICL_API_REQUEST_CONFIG['gpt3']['engine'],
prompt=input_texts,
temperature=OPENICL_API_REQUEST_CONFIG['gpt3']['temperature'],
max_tokens=OPENICL_API_REQUEST_CONFIG['gpt3']['max_tokens'],
top_p=OPENICL_API_REQUEST_CONFIG['gpt3']['top_p'],
frequency_penalty=OPENICL_API_REQUEST_CONFIG['gpt3']
['frequency_penalty'],
presence_penalty=OPENICL_API_REQUEST_CONFIG['gpt3']
['presence_penalty'])
time.sleep(OPENICL_API_REQUEST_CONFIG['gpt3']['sleep_time'])
return [(input + r['text'])
for r, input in zip(response['choices'], input_texts)
], [r['text'] for r in response['choices']]
...@@ -11,7 +11,7 @@ def _check_type_list(obj, typelist: List): ...@@ -11,7 +11,7 @@ def _check_type_list(obj, typelist: List):
elif isinstance(obj, _type): elif isinstance(obj, _type):
return obj return obj
raise TypeError( raise TypeError(
f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' # noqa
) )
...@@ -20,7 +20,7 @@ def _check_dataset(obj) -> Union[Dataset, DatasetDict]: ...@@ -20,7 +20,7 @@ def _check_dataset(obj) -> Union[Dataset, DatasetDict]:
return obj return obj
else: else:
raise TypeError( raise TypeError(
f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' # noqa
) )
......
accelerate>=0.19.0 accelerate>=0.19.0
boto3
colossalai
cpm_kernels
datasets>=2.12.0 datasets>=2.12.0
evaluate>=0.3.0 evaluate>=0.3.0
fairscale
faiss_gpu==1.7.2 faiss_gpu==1.7.2
jieba
mmengine
nltk==3.8 nltk==3.8
numpy==1.23.4 numpy==1.23.4
openai==0.27.1 openai
pandas<2.0.0
rank_bm25==0.2.2 rank_bm25==0.2.2
requests==2.28.1 requests==2.28.1
scikit_learn==1.2.1 scikit_learn==1.2.1
sentence_transformers==2.2.2 sentence_transformers==2.2.2
tabulate
tiktoken
tokenizers>=0.13.3
torch>=1.13.1 torch>=1.13.1
tqdm==4.64.1 tqdm==4.64.1
transformers>=4.29.1 transformers>=4.29.1
openai
mmengine
jieba
pandas<2.0.0
cpm_kernels
tokenizers>=0.13.3
tabulate
fairscale
colossalai
tabulate
boto3
tiktoken
\ No newline at end of file
...@@ -13,6 +13,7 @@ class DownloadNLTK(install): ...@@ -13,6 +13,7 @@ class DownloadNLTK(install):
with open('README_zh-CN.md') as f: with open('README_zh-CN.md') as f:
readme = f.read() readme = f.read()
def parse_requirements(fname='requirements.txt', with_version=True): def parse_requirements(fname='requirements.txt', with_version=True):
"""Parse the package dependencies listed in a requirements file but strips """Parse the package dependencies listed in a requirements file but strips
specific versioning information. specific versioning information.
...@@ -91,12 +92,14 @@ def parse_requirements(fname='requirements.txt', with_version=True): ...@@ -91,12 +92,14 @@ def parse_requirements(fname='requirements.txt', with_version=True):
packages = list(gen_packages_items()) packages = list(gen_packages_items())
return packages return packages
def get_version(): def get_version():
version_file = 'opencompass/__init__.py' version_file = 'opencompass/__init__.py'
with open(version_file, 'r', encoding='utf-8') as f: with open(version_file, 'r', encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec')) exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__'] return locals()['__version__']
def do_setup(): def do_setup():
setup( setup(
name='opencompass', name='opencompass',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment