Commit 2778a3d0 authored by luopl's avatar luopl
Browse files

updata to v0.9.1_stable

parent e92143e3
...@@ -7,6 +7,8 @@ data ...@@ -7,6 +7,8 @@ data
docker docker
saves saves
hf_cache hf_cache
ms_cache
om_cache
output output
.dockerignore .dockerignore
.gitattributes .gitattributes
......
# Note: actually we do not support .env, just for reference # Note: actually we do not support .env, just for reference
# api # api
API_HOST=0.0.0.0 API_HOST=
API_PORT=8000 API_PORT=
API_KEY= API_KEY=
API_MODEL_NAME=gpt-3.5-turbo API_MODEL_NAME=
FASTAPI_ROOT_PATH= FASTAPI_ROOT_PATH=
MAX_CONCURRENT=
# general # general
DISABLE_VERSION_CHECK= DISABLE_VERSION_CHECK=
FORCE_CHECK_IMPORTS= FORCE_CHECK_IMPORTS=
LLAMAFACTORY_VERBOSITY= LLAMAFACTORY_VERBOSITY=
USE_MODELSCOPE_HUB= USE_MODELSCOPE_HUB=
USE_OPENMIND_HUB=
RECORD_VRAM= RECORD_VRAM=
# torchrun # torchrun
FORCE_TORCHRUN= FORCE_TORCHRUN=
MASTER_ADDR= MASTER_ADDR=
MASTER_PORT= MASTER_PORT=
NNODES= NNODES=
RANK= NODE_RANK=
NPROC_PER_NODE= NPROC_PER_NODE=
# wandb # wandb
WANDB_DISABLED= WANDB_DISABLED=
WANDB_PROJECT=huggingface WANDB_PROJECT=
WANDB_API_KEY= WANDB_API_KEY=
# gradio ui # gradio ui
GRADIO_SHARE=False GRADIO_SHARE=
GRADIO_SERVER_NAME=0.0.0.0 GRADIO_SERVER_NAME=
GRADIO_SERVER_PORT= GRADIO_SERVER_PORT=
GRADIO_ROOT_PATH= GRADIO_ROOT_PATH=
GRADIO_IPV6=
# setup # setup
ENABLE_SHORT_CONSOLE=1 ENABLE_SHORT_CONSOLE=1
# reserved (do not use) # reserved (do not use)
......
...@@ -159,9 +159,13 @@ cython_debug/ ...@@ -159,9 +159,13 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/ .idea/
# vscode
.vscode/
# custom .gitignore # custom .gitignore
ms_cache/ ms_cache/
hf_cache/ hf_cache/
om_cache/
cache/ cache/
config/ config/
saves/ saves/
......
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-ast
- id: check-added-large-files
args: ['--maxkb=25000']
- id: check-merge-conflict
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
hooks:
- id: pyupgrade
args: [--py38-plus]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
.PHONY: quality style test .PHONY: build commit quality style test
check_dirs := scripts src tests setup.py check_dirs := scripts src tests setup.py
build:
pip install build && python -m build
commit:
pre-commit install
pre-commit run --all-files
quality: quality:
ruff check $(check_dirs) ruff check $(check_dirs)
ruff format --check $(check_dirs) ruff format --check $(check_dirs)
...@@ -11,4 +18,4 @@ style: ...@@ -11,4 +18,4 @@ style:
ruff format $(check_dirs) ruff format $(check_dirs)
test: test:
CUDA_VISIBLE_DEVICES= pytest tests/ CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
assets/wechat.jpg

199 KB | W: | H:

assets/wechat.jpg

165 KB | W: | H:

assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
assets/wechat.jpg
  • 2-up
  • Swipe
  • Onion skin
assets/wechat_npu.jpg

168 KB | W: | H:

assets/wechat_npu.jpg

167 KB | W: | H:

assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
assets/wechat_npu.jpg
  • 2-up
  • Swipe
  • Onion skin
...@@ -17,9 +17,9 @@ _CITATION = """\ ...@@ -17,9 +17,9 @@ _CITATION = """\
} }
""" """
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT) _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
_LICENSE = "gpl-3.0" _LICENSE = "gpl-3.0"
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT) _URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
class BelleMultiturn(datasets.GeneratorBasedBuilder): class BelleMultiturn(datasets.GeneratorBasedBuilder):
...@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder): ...@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})] return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
def _generate_examples(self, filepath: str): def _generate_examples(self, filepath: str):
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, encoding="utf-8") as f:
for key, row in enumerate(f): for key, row in enumerate(f):
data = json.loads(row) data = json.loads(row)
conversations = [] conversations = []
......
...@@ -54,7 +54,8 @@ ...@@ -54,7 +54,8 @@
}, },
"alpaca_en": { "alpaca_en": {
"hf_hub_url": "llamafactory/alpaca_en", "hf_hub_url": "llamafactory/alpaca_en",
"ms_hub_url": "llamafactory/alpaca_en" "ms_hub_url": "llamafactory/alpaca_en",
"om_hub_url": "HaM/alpaca_en"
}, },
"alpaca_zh": { "alpaca_zh": {
"hf_hub_url": "llamafactory/alpaca_zh", "hf_hub_url": "llamafactory/alpaca_zh",
...@@ -66,7 +67,8 @@ ...@@ -66,7 +67,8 @@
}, },
"alpaca_gpt4_zh": { "alpaca_gpt4_zh": {
"hf_hub_url": "llamafactory/alpaca_gpt4_zh", "hf_hub_url": "llamafactory/alpaca_gpt4_zh",
"ms_hub_url": "llamafactory/alpaca_gpt4_zh" "ms_hub_url": "llamafactory/alpaca_gpt4_zh",
"om_hub_url": "State_Cloud/alpaca-gpt4-data-zh"
}, },
"glaive_toolcall_en": { "glaive_toolcall_en": {
"hf_hub_url": "llamafactory/glaive_toolcall_en", "hf_hub_url": "llamafactory/glaive_toolcall_en",
......
...@@ -8,9 +8,9 @@ import datasets ...@@ -8,9 +8,9 @@ import datasets
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co") _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "Human preference data about helpfulness and harmlessness." _DESCRIPTION = "Human preference data about helpfulness and harmlessness."
_CITATION = "" _CITATION = ""
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT) _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
_LICENSE = "mit" _LICENSE = "mit"
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT) _URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
_URLS = { _URLS = {
"train": [ "train": [
_URL + "harmless-base/train.jsonl.gz", _URL + "harmless-base/train.jsonl.gz",
...@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder): ...@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]): def _generate_examples(self, filepaths: List[str]):
key = 0 key = 0
for filepath in filepaths: for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, encoding="utf-8") as f:
for row in f: for row in f:
data = json.loads(row) data = json.loads(row)
chosen = data["chosen"] chosen = data["chosen"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment