Commit 2778a3d0 authored by luopl's avatar luopl
Browse files

updata to v0.9.1_stable

parent e92143e3
...@@ -20,9 +20,9 @@ _CITATION = """\ ...@@ -20,9 +20,9 @@ _CITATION = """\
} }
""" """
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT) _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
_LICENSE = "cc-by-nc-4.0" _LICENSE = "cc-by-nc-4.0"
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT) _BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
class UltraChat(datasets.GeneratorBasedBuilder): class UltraChat(datasets.GeneratorBasedBuilder):
...@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder): ...@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]): def _generate_examples(self, filepaths: List[str]):
for filepath in filepaths: for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, encoding="utf-8") as f:
for row in f: for row in f:
try: try:
data = json.loads(row) data = json.loads(row)
......
# Use the NVIDIA official image with PyTorch 2.3.0 # Default use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
FROM nvcr.io/nvidia/pytorch:24.02-py3 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
FROM ${BASE_IMAGE}
# Define environments # Define environments
ENV MAX_JOBS=4 ENV MAX_JOBS=4
......
...@@ -16,6 +16,7 @@ services: ...@@ -16,6 +16,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
ports: ports:
...@@ -23,6 +24,7 @@ services: ...@@ -23,6 +24,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
deploy: deploy:
......
...@@ -10,6 +10,7 @@ services: ...@@ -10,6 +10,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
- /usr/local/dcmi:/usr/local/dcmi - /usr/local/dcmi:/usr/local/dcmi
...@@ -21,6 +22,7 @@ services: ...@@ -21,6 +22,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
devices: devices:
......
...@@ -15,6 +15,7 @@ services: ...@@ -15,6 +15,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
- ../../saves:/app/saves - ../../saves:/app/saves
...@@ -23,6 +24,7 @@ services: ...@@ -23,6 +24,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
devices: devices:
......
...@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder): ...@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder):
df = pd.read_csv(filepath, header=None) df = pd.read_csv(filepath, header=None)
df.columns = ["question", "A", "B", "C", "D", "answer"] df.columns = ["question", "A", "B", "C", "D", "answer"]
for i, instance in enumerate(df.to_dict(orient="records")): yield from enumerate(df.to_dict(orient="records"))
yield i, instance
...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### Supervised Fine-Tuning on Multiple Nodes #### Supervised Fine-Tuning on Multiple Nodes
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
``` ```
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
......
...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### 多机指令监督微调 #### 多机指令监督微调
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
``` ```
#### 使用 DeepSpeed ZeRO-3 平均分配显存 #### 使用 DeepSpeed ZeRO-3 平均分配显存
......
...@@ -10,7 +10,7 @@ use_adam_mini: true ...@@ -10,7 +10,7 @@ use_adam_mini: true
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: qwen template: qwen
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment