Commit 2778a3d0 authored by luopl's avatar luopl
Browse files

updata to v0.9.1_stable

parent e92143e3
......@@ -7,6 +7,8 @@ data
docker
saves
hf_cache
ms_cache
om_cache
output
.dockerignore
.gitattributes
......
# Note: actually we do not support .env, just for reference
# api
API_HOST=0.0.0.0
API_PORT=8000
API_HOST=
API_PORT=
API_KEY=
API_MODEL_NAME=gpt-3.5-turbo
API_MODEL_NAME=
FASTAPI_ROOT_PATH=
MAX_CONCURRENT=
# general
DISABLE_VERSION_CHECK=
FORCE_CHECK_IMPORTS=
LLAMAFACTORY_VERBOSITY=
USE_MODELSCOPE_HUB=
USE_OPENMIND_HUB=
RECORD_VRAM=
# torchrun
FORCE_TORCHRUN=
MASTER_ADDR=
MASTER_PORT=
NNODES=
RANK=
NODE_RANK=
NPROC_PER_NODE=
# wandb
WANDB_DISABLED=
WANDB_PROJECT=huggingface
WANDB_PROJECT=
WANDB_API_KEY=
# gradio ui
GRADIO_SHARE=False
GRADIO_SERVER_NAME=0.0.0.0
GRADIO_SHARE=
GRADIO_SERVER_NAME=
GRADIO_SERVER_PORT=
GRADIO_ROOT_PATH=
GRADIO_IPV6=
# setup
ENABLE_SHORT_CONSOLE=1
# reserved (do not use)
......
......@@ -159,9 +159,13 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# vscode
.vscode/
# custom .gitignore
ms_cache/
hf_cache/
om_cache/
cache/
config/
saves/
......
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-ast
- id: check-added-large-files
args: ['--maxkb=25000']
- id: check-merge-conflict
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
hooks:
- id: pyupgrade
args: [--py38-plus]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
.PHONY: quality style test
.PHONY: build commit quality style test
check_dirs := scripts src tests setup.py
build:
pip install build && python -m build
commit:
pre-commit install
pre-commit run --all-files
quality:
ruff check $(check_dirs)
ruff format --check $(check_dirs)
......@@ -11,4 +18,4 @@ style:
ruff format $(check_dirs)
test:
CUDA_VISIBLE_DEVICES= pytest tests/
CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
assets/wechat.jpg

199 KB | W: | H:

assets/wechat.jpg

165 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
assets/wechat_npu.jpg

168 KB | W: | H:

assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
  • 2-up
  • Swipe
  • Onion skin
......@@ -17,9 +17,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
_LICENSE = "gpl-3.0"
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
class BelleMultiturn(datasets.GeneratorBasedBuilder):
......@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
def _generate_examples(self, filepath: str):
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for key, row in enumerate(f):
data = json.loads(row)
conversations = []
......
......@@ -54,7 +54,8 @@
},
"alpaca_en": {
"hf_hub_url": "llamafactory/alpaca_en",
"ms_hub_url": "llamafactory/alpaca_en"
"ms_hub_url": "llamafactory/alpaca_en",
"om_hub_url": "HaM/alpaca_en"
},
"alpaca_zh": {
"hf_hub_url": "llamafactory/alpaca_zh",
......@@ -66,7 +67,8 @@
},
"alpaca_gpt4_zh": {
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
"ms_hub_url": "llamafactory/alpaca_gpt4_zh",
"om_hub_url": "State_Cloud/alpaca-gpt4-data-zh"
},
"glaive_toolcall_en": {
"hf_hub_url": "llamafactory/glaive_toolcall_en",
......
......@@ -8,9 +8,9 @@ import datasets
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
_CITATION = ""
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
_LICENSE = "mit"
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
_URLS = {
"train": [
_URL + "harmless-base/train.jsonl.gz",
......@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
key = 0
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
data = json.loads(row)
chosen = data["chosen"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment