Commit 2778a3d0 authored by luopl's avatar luopl
Browse files

updata to v0.9.1_stable

parent e92143e3
......@@ -137,4 +137,4 @@
"mllm_demo_data/3.jpg"
]
}
]
\ No newline at end of file
]
......@@ -44,4 +44,4 @@
"mllm_demo_data/3.mp4"
]
}
]
\ No newline at end of file
]
......@@ -20,9 +20,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
_LICENSE = "cc-by-nc-4.0"
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
_BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
class UltraChat(datasets.GeneratorBasedBuilder):
......@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
try:
data = json.loads(row)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
# Use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
FROM nvcr.io/nvidia/pytorch:24.02-py3
# Default use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
FROM ${BASE_IMAGE}
# Define environments
ENV MAX_JOBS=4
......
......@@ -16,6 +16,7 @@ services:
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
ports:
......@@ -23,6 +24,7 @@ services:
- "8000:8000"
ipc: host
tty: true
shm_size: '16gb'
stdin_open: true
command: bash
deploy:
......
......@@ -10,6 +10,7 @@ services:
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
- /usr/local/dcmi:/usr/local/dcmi
......@@ -21,6 +22,7 @@ services:
- "8000:8000"
ipc: host
tty: true
shm_size: '16gb'
stdin_open: true
command: bash
devices:
......
......@@ -15,6 +15,7 @@ services:
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
- ../../saves:/app/saves
......@@ -23,6 +24,7 @@ services:
- "8000:8000"
ipc: host
tty: true
shm_size: '16gb'
stdin_open: true
command: bash
devices:
......
......@@ -207,4 +207,4 @@
"name": "兽医学",
"category": "STEM"
}
}
\ No newline at end of file
}
......@@ -267,4 +267,4 @@
"name": "世界宗教",
"category": "Humanities"
}
}
\ No newline at end of file
}
......@@ -227,4 +227,4 @@
"name": "world religions",
"category": "Humanities"
}
}
\ No newline at end of file
}
......@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder):
df = pd.read_csv(filepath, header=None)
df.columns = ["question", "A", "B", "C", "D", "answer"]
for i, instance in enumerate(df.to_dict(orient="records")):
yield i, instance
yield from enumerate(df.to_dict(orient="records"))
......@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### Supervised Fine-Tuning on Multiple Nodes
```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
......
......@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
#### 多机指令监督微调
```bash
FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
#### 使用 DeepSpeed ZeRO-3 平均分配显存
......
......@@ -25,4 +25,4 @@
"contiguous_gradients": true,
"round_robin_gradients": true
}
}
\ No newline at end of file
}
......@@ -25,4 +25,4 @@
"contiguous_gradients": true,
"round_robin_gradients": true
}
}
\ No newline at end of file
}
......@@ -29,4 +29,4 @@
"contiguous_gradients": true,
"round_robin_gradients": true
}
}
\ No newline at end of file
}
......@@ -27,4 +27,4 @@
"stage3_max_reuse_distance": 1e9,
"stage3_gather_16bit_weights_on_model_save": true
}
}
\ No newline at end of file
}
......@@ -35,4 +35,4 @@
"stage3_max_reuse_distance": 1e9,
"stage3_gather_16bit_weights_on_model_save": true
}
}
\ No newline at end of file
}
......@@ -10,7 +10,7 @@ use_adam_mini: true
### dataset
dataset: identity,alpaca_en_demo
template: qwen
cutoff_len: 1024
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment