Commit 2778a3d0 authored by luopl's avatar luopl
Browse files

updata to v0.9.1_stable

parent e92143e3
...@@ -137,4 +137,4 @@ ...@@ -137,4 +137,4 @@
"mllm_demo_data/3.jpg" "mllm_demo_data/3.jpg"
] ]
} }
] ]
\ No newline at end of file
...@@ -44,4 +44,4 @@ ...@@ -44,4 +44,4 @@
"mllm_demo_data/3.mp4" "mllm_demo_data/3.mp4"
] ]
} }
] ]
\ No newline at end of file
...@@ -20,9 +20,9 @@ _CITATION = """\ ...@@ -20,9 +20,9 @@ _CITATION = """\
} }
""" """
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT) _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
_LICENSE = "cc-by-nc-4.0" _LICENSE = "cc-by-nc-4.0"
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT) _BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
class UltraChat(datasets.GeneratorBasedBuilder): class UltraChat(datasets.GeneratorBasedBuilder):
...@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder): ...@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]): def _generate_examples(self, filepaths: List[str]):
for filepath in filepaths: for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, encoding="utf-8") as f:
for row in f: for row in f:
try: try:
data = json.loads(row) data = json.loads(row)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
# Use the NVIDIA official image with PyTorch 2.3.0 # Default use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
FROM nvcr.io/nvidia/pytorch:24.02-py3 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
FROM ${BASE_IMAGE}
# Define environments # Define environments
ENV MAX_JOBS=4 ENV MAX_JOBS=4
......
...@@ -16,6 +16,7 @@ services: ...@@ -16,6 +16,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
ports: ports:
...@@ -23,6 +24,7 @@ services: ...@@ -23,6 +24,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
deploy: deploy:
......
...@@ -10,6 +10,7 @@ services: ...@@ -10,6 +10,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
- /usr/local/dcmi:/usr/local/dcmi - /usr/local/dcmi:/usr/local/dcmi
...@@ -21,6 +22,7 @@ services: ...@@ -21,6 +22,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
devices: devices:
......
...@@ -15,6 +15,7 @@ services: ...@@ -15,6 +15,7 @@ services:
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope - ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data - ../../data:/app/data
- ../../output:/app/output - ../../output:/app/output
- ../../saves:/app/saves - ../../saves:/app/saves
...@@ -23,6 +24,7 @@ services: ...@@ -23,6 +24,7 @@ services:
- "8000:8000" - "8000:8000"
ipc: host ipc: host
tty: true tty: true
shm_size: '16gb'
stdin_open: true stdin_open: true
command: bash command: bash
devices: devices:
......
...@@ -207,4 +207,4 @@ ...@@ -207,4 +207,4 @@
"name": "兽医学", "name": "兽医学",
"category": "STEM" "category": "STEM"
} }
} }
\ No newline at end of file
...@@ -267,4 +267,4 @@ ...@@ -267,4 +267,4 @@
"name": "世界宗教", "name": "世界宗教",
"category": "Humanities" "category": "Humanities"
} }
} }
\ No newline at end of file
...@@ -227,4 +227,4 @@ ...@@ -227,4 +227,4 @@
"name": "world religions", "name": "world religions",
"category": "Humanities" "category": "Humanities"
} }
} }
\ No newline at end of file
...@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder): ...@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder):
df = pd.read_csv(filepath, header=None) df = pd.read_csv(filepath, header=None)
df.columns = ["question", "A", "B", "C", "D", "answer"] df.columns = ["question", "A", "B", "C", "D", "answer"]
for i, instance in enumerate(df.to_dict(orient="records")): yield from enumerate(df.to_dict(orient="records"))
yield i, instance
...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### Supervised Fine-Tuning on Multiple Nodes #### Supervised Fine-Tuning on Multiple Nodes
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
``` ```
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
......
...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ...@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### 多机指令监督微调 #### 多机指令监督微调
```bash ```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
``` ```
#### 使用 DeepSpeed ZeRO-3 平均分配显存 #### 使用 DeepSpeed ZeRO-3 平均分配显存
......
...@@ -25,4 +25,4 @@ ...@@ -25,4 +25,4 @@
"contiguous_gradients": true, "contiguous_gradients": true,
"round_robin_gradients": true "round_robin_gradients": true
} }
} }
\ No newline at end of file
...@@ -25,4 +25,4 @@ ...@@ -25,4 +25,4 @@
"contiguous_gradients": true, "contiguous_gradients": true,
"round_robin_gradients": true "round_robin_gradients": true
} }
} }
\ No newline at end of file
...@@ -29,4 +29,4 @@ ...@@ -29,4 +29,4 @@
"contiguous_gradients": true, "contiguous_gradients": true,
"round_robin_gradients": true "round_robin_gradients": true
} }
} }
\ No newline at end of file
...@@ -27,4 +27,4 @@ ...@@ -27,4 +27,4 @@
"stage3_max_reuse_distance": 1e9, "stage3_max_reuse_distance": 1e9,
"stage3_gather_16bit_weights_on_model_save": true "stage3_gather_16bit_weights_on_model_save": true
} }
} }
\ No newline at end of file
...@@ -35,4 +35,4 @@ ...@@ -35,4 +35,4 @@
"stage3_max_reuse_distance": 1e9, "stage3_max_reuse_distance": 1e9,
"stage3_gather_16bit_weights_on_model_save": true "stage3_gather_16bit_weights_on_model_save": true
} }
} }
\ No newline at end of file
...@@ -10,7 +10,7 @@ use_adam_mini: true ...@@ -10,7 +10,7 @@ use_adam_mini: true
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: qwen template: qwen
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment