updata to v0.9.1_stable

2778a3d0 · luopl · e92143e3 · 2778a3d0 · 2778a3d0 · 2778a3d0
Commit 2778a3d0 authored Jan 16, 2025 by luopl
20 changed files
--- a/data/mllm_demo.json
+++ b/data/mllm_demo.json
@@ -137,4 +137,4 @@
      "mllm_demo_data/3.jpg"
    ]
  }
-]
\ No newline at end of file
+]
--- a/data/mllm_video_demo.json
+++ b/data/mllm_video_demo.json
@@ -44,4 +44,4 @@
      "mllm_demo_data/3.mp4"
    ]
  }
-]
\ No newline at end of file
+]
--- a/data/ultra_chat/ultra_chat.py
+++ b/data/ultra_chat/ultra_chat.py
@@ -20,9 +20,9 @@ _CITATION = """\
 }
 """

-_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
+_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
 _LICENSE = "cc-by-nc-4.0"
-_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
+_BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"


 class UltraChat(datasets.GeneratorBasedBuilder):
@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):

    def _generate_examples(self, filepaths: List[str]):
        for filepath in filepaths:
-            with open(filepath, "r", encoding="utf-8") as f:
+            with open(filepath, encoding="utf-8") as f:
                for row in f:
                    try:
                        data = json.loads(row)

--- a/data/wiki_demo.txt
+++ b/data/wiki_demo.txt
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
-# Use the NVIDIA official image with PyTorch 2.3.0
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
-FROM nvcr.io/nvidia/pytorch:24.02-py3
+# Default use the NVIDIA official image with PyTorch 2.3.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
+FROM ${BASE_IMAGE}

 # Define environments
 ENV MAX_JOBS=4

--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -16,6 +16,7 @@ services:
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
      - ../../ms_cache:/root/.cache/modelscope
+      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
    ports:
@@ -23,6 +24,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
+    shm_size: '16gb'
    stdin_open: true
    command: bash
    deploy:

--- a/docker/docker-npu/docker-compose.yml
+++ b/docker/docker-npu/docker-compose.yml
@@ -10,6 +10,7 @@ services:
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
      - ../../ms_cache:/root/.cache/modelscope
+      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
      - /usr/local/dcmi:/usr/local/dcmi
@@ -21,6 +22,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
+    shm_size: '16gb'
    stdin_open: true
    command: bash
    devices:

--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@@ -15,6 +15,7 @@ services:
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
      - ../../ms_cache:/root/.cache/modelscope
+      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
      - ../../saves:/app/saves
@@ -23,6 +24,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
+    shm_size: '16gb'
    stdin_open: true
    command: bash
    devices:

--- a/evaluation/ceval/mapping.json
+++ b/evaluation/ceval/mapping.json
@@ -207,4 +207,4 @@
    "name": "兽医学",
    "category": "STEM"
  }
-}
\ No newline at end of file
+}
--- a/evaluation/cmmlu/mapping.json
+++ b/evaluation/cmmlu/mapping.json
@@ -267,4 +267,4 @@
    "name": "世界宗教",
    "category": "Humanities"
  }
-}
\ No newline at end of file
+}
--- a/evaluation/mmlu/mapping.json
+++ b/evaluation/mmlu/mapping.json
@@ -227,4 +227,4 @@
    "name": "world religions",
    "category": "Humanities"
  }
-}
\ No newline at end of file
+}
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@@ -158,5 +158,4 @@ class MMLU(datasets.GeneratorBasedBuilder):
        df = pd.read_csv(filepath, header=None)
        df.columns = ["question", "A", "B", "C", "D", "answer"]

-        for i, instance in enumerate(df.to_dict(orient="records")):
-            yield i, instance
+        yield from enumerate(df.to_dict(orient="records"))
--- a/examples/README.md
+++ b/examples/README.md
@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
 #### Supervised Fine-Tuning on Multiple Nodes

 ```bash
-FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
-FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```

 #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)

--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -89,8 +89,8 @@ llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml
 #### 多机指令监督微调

 ```bash
-FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
-FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
 ```

 #### 使用 DeepSpeed ZeRO-3 平均分配显存

--- a/examples/deepspeed/ds_z0_config.json
+++ b/examples/deepspeed/ds_z0_config.json
@@ -25,4 +25,4 @@
    "contiguous_gradients": true,
    "round_robin_gradients": true
  }
-}
\ No newline at end of file
+}
--- a/examples/deepspeed/ds_z2_config.json
+++ b/examples/deepspeed/ds_z2_config.json
@@ -25,4 +25,4 @@
    "contiguous_gradients": true,
    "round_robin_gradients": true
  }
-}
\ No newline at end of file
+}
--- a/examples/deepspeed/ds_z2_offload_config.json
+++ b/examples/deepspeed/ds_z2_offload_config.json
@@ -29,4 +29,4 @@
    "contiguous_gradients": true,
    "round_robin_gradients": true
  }
-}
\ No newline at end of file
+}
--- a/examples/deepspeed/ds_z3_config.json
+++ b/examples/deepspeed/ds_z3_config.json
@@ -27,4 +27,4 @@
    "stage3_max_reuse_distance": 1e9,
    "stage3_gather_16bit_weights_on_model_save": true
  }
-}
\ No newline at end of file
+}
--- a/examples/deepspeed/ds_z3_offload_config.json
+++ b/examples/deepspeed/ds_z3_offload_config.json
@@ -35,4 +35,4 @@
    "stage3_max_reuse_distance": 1e9,
    "stage3_gather_16bit_weights_on_model_save": true
  }
-}
\ No newline at end of file
+}
--- a/examples/extras/adam_mini/qwen2_full_sft.yaml
+++ b/examples/extras/adam_mini/qwen2_full_sft.yaml
@@ -10,7 +10,7 @@ use_adam_mini: true
 ### dataset
 dataset: identity,alpaca_en_demo
 template: qwen
-cutoff_len: 1024
+cutoff_len: 2048
 max_samples: 1000
 overwrite_cache: true
 preprocessing_num_workers: 16